From a285fe05fd7bfcca3953976e7b3356dc39b020a7 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 14 Feb 2018 13:45:30 +0000
Subject: [PATCH 001/278] Add timeout to ResponseCache of /public_rooms

---
 synapse/handlers/room_list.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index fc507cef367..878db0db1e0 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -46,9 +46,12 @@
 class RoomListHandler(BaseHandler):
     def __init__(self, hs):
         super(RoomListHandler, self).__init__(hs)
-        self.response_cache = ResponseCache(hs, "room_list")
-        self.remote_response_cache = ResponseCache(hs, "remote_room_list",
-                                                   timeout_ms=30 * 1000)
+        self.response_cache = ResponseCache(
+            hs, "room_list", timeout_ms=10 * 60 * 1000,
+        )
+        self.remote_response_cache = ResponseCache(
+            hs, "remote_room_list", timeout_ms=30 * 1000,
+        )
 
     def get_local_public_room_list(self, limit=None, since_token=None,
                                    search_filter=None,

From 66dcbf47a36b5ca0e88d4658578d6fb5e6dbd910 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 14 Feb 2018 13:47:14 +0000
Subject: [PATCH 002/278] Disable auto search for prefixes in event search

---
 synapse/storage/search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/search.py b/synapse/storage/search.py
index f0fa5d76319..ef4f587d8c3 100644
--- a/synapse/storage/search.py
+++ b/synapse/storage/search.py
@@ -722,7 +722,7 @@ def _parse_query(database_engine, search_term):
     results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
 
     if isinstance(database_engine, PostgresEngine):
-        return " & ".join(result + ":*" for result in results)
+        return " & ".join(result for result in results)
     elif isinstance(database_engine, Sqlite3Engine):
         return " & ".join(result + "*" for result in results)
     else:

From 43bb12e640ce1563541b246bffe09870d43892ea Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 5 May 2017 11:02:34 +0100
Subject: [PATCH 003/278] Disable presence

This reverts commit 0ebd376a53bb75ade6d65db2d716478758c2c9f0 and
disables presence a bit more
---
 synapse/app/synchrotron.py         |  1 +
 synapse/handlers/initial_sync.py   |  1 +
 synapse/handlers/presence.py       |  3 +++
 synapse/handlers/sync.py           |  2 +-
 synapse/rest/client/v1/presence.py |  2 +-
 tests/rest/client/v1/test_rooms.py | 12 +++++++-----
 6 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py
index 82f06ea185e..3cade3c68eb 100644
--- a/synapse/app/synchrotron.py
+++ b/synapse/app/synchrotron.py
@@ -116,6 +116,7 @@ def __init__(self, hs):
         logger.info("Presence process_id is %r", self.process_id)
 
     def send_user_sync(self, user_id, is_syncing, last_sync_ms):
+        return
         self.hs.get_tcp_replication().send_user_sync(user_id, is_syncing, last_sync_ms)
 
     def mark_as_coming_online(self, user_id):
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index 71af86fe213..5242309c1ea 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -373,6 +373,7 @@ def _room_initial_sync_joined(self, user_id, room_id, pagin_config,
 
         @defer.inlineCallbacks
         def get_presence():
+            defer.returnValue([])
             states = yield presence_handler.get_states(
                 [m.user_id for m in room_members],
                 as_event=True,
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 7fe568132fb..c7336020a25 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -391,6 +391,7 @@ def bump_presence_active_time(self, user):
         """We've seen the user do something that indicates they're interacting
         with the app.
         """
+        return
         user_id = user.to_string()
 
         bump_active_time_counter.inc()
@@ -420,6 +421,7 @@ def user_syncing(self, user_id, affect_presence=True):
                 Useful for streams that are not associated with an actual
                 client that is being used by a user.
         """
+        affect_presence = False
         if affect_presence:
             curr_sync = self.user_to_num_current_syncs.get(user_id, 0)
             self.user_to_num_current_syncs[user_id] = curr_sync + 1
@@ -465,6 +467,7 @@ def get_currently_syncing_users(self):
         Returns:
             set(str): A set of user_id strings.
         """
+        return set()
         syncing_user_ids = {
             user_id for user_id, count in self.user_to_num_current_syncs.items()
             if count
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 51ec727df06..1e8a50514b4 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -620,7 +620,7 @@ def generate_sync_result(self, sync_config, since_token=None, full_state=False):
             since_token is None and
             sync_config.filter_collection.blocks_all_presence()
         )
-        if not block_all_presence_data:
+        if False and not block_all_presence_data:
             yield self._generate_sync_entry_for_presence(
                 sync_result_builder, newly_joined_rooms, newly_joined_users
             )
diff --git a/synapse/rest/client/v1/presence.py b/synapse/rest/client/v1/presence.py
index 647994bd53d..a975666d128 100644
--- a/synapse/rest/client/v1/presence.py
+++ b/synapse/rest/client/v1/presence.py
@@ -83,7 +83,7 @@ def on_PUT(self, request, user_id):
         except Exception:
             raise SynapseError(400, "Unable to parse state")
 
-        yield self.presence_handler.set_state(user, state)
+        # yield self.presence_handler.set_state(user, state)
 
         defer.returnValue((200, {}))
 
diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py
index 61d737725be..5bc0ba19347 100644
--- a/tests/rest/client/v1/test_rooms.py
+++ b/tests/rest/client/v1/test_rooms.py
@@ -984,11 +984,13 @@ def test_initial_sync(self):
 
         self.assertTrue("presence" in response)
 
-        presence_by_user = {
-            e["content"]["user_id"]: e for e in response["presence"]
-        }
-        self.assertTrue(self.user_id in presence_by_user)
-        self.assertEquals("m.presence", presence_by_user[self.user_id]["type"])
+        # presence is turned off on hotfixes
+
+        # presence_by_user = {
+        #     e["content"]["user_id"]: e for e in response["presence"]
+        # }
+        # self.assertTrue(self.user_id in presence_by_user)
+        # self.assertEquals("m.presence", presence_by_user[self.user_id]["type"])
 
 
 class RoomMessageListTestCase(RestTestCase):

From e5537cf98333b428dbc481ed443daed2f0cfa074 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 23 May 2017 10:48:13 +0100
Subject: [PATCH 004/278] Limit concurrent AS joins

---
 synapse/handlers/room_member.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index f930e939e81..5e351df8aa4 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -31,7 +31,7 @@
 )
 from synapse.api.errors import AuthError, SynapseError, Codes
 from synapse.types import UserID, RoomID
-from synapse.util.async import Linearizer
+from synapse.util.async import Linearizer, Limiter
 from synapse.util.distributor import user_left_room, user_joined_room
 
 
@@ -68,6 +68,7 @@ def __init__(self, hs):
         self.event_creation_hander = hs.get_event_creation_handler()
 
         self.member_linearizer = Linearizer(name="member")
+        self.member_limiter = Limiter(3)
 
         self.clock = hs.get_clock()
         self.spam_checker = hs.get_spam_checker()
@@ -241,18 +242,23 @@ def update_membership(
     ):
         key = (room_id,)
 
-        with (yield self.member_linearizer.queue(key)):
-            result = yield self._update_membership(
-                requester,
-                target,
-                room_id,
-                action,
-                txn_id=txn_id,
-                remote_room_hosts=remote_room_hosts,
-                third_party_signed=third_party_signed,
-                ratelimit=ratelimit,
-                content=content,
-            )
+        as_id = object()
+        if requester.app_service:
+            as_id = requester.app_service.id
+
+        with (yield self.member_limiter.queue(as_id)):
+            with (yield self.member_linearizer.queue(key)):
+                result = yield self._update_membership(
+                    requester,
+                    target,
+                    room_id,
+                    action,
+                    txn_id=txn_id,
+                    remote_room_hosts=remote_room_hosts,
+                    third_party_signed=third_party_signed,
+                    ratelimit=ratelimit,
+                    content=content,
+                )
 
         defer.returnValue(result)
 

From e6b1ea3eb20dcf60e1d460716bb658f8935de8d6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 14 Feb 2018 13:52:29 +0000
Subject: [PATCH 005/278] Disable presence in txn queue

---
 synapse/federation/transaction_queue.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index d72b057e282..b9d0639a1b9 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -303,6 +303,7 @@ def send_presence(self, states):
         Args:
             states (list(UserPresenceState))
         """
+        return
 
         # First we queue up the new presence by user ID, so multiple presence
         # updates in quick successtion are correctly handled

From 1766a5fdc09da970b44477690b8f1825e1265fb4 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 14 Feb 2018 13:53:02 +0000
Subject: [PATCH 006/278] Increase MAX_EVENTS_BEHIND for replication clients

---
 synapse/replication/tcp/streams.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/replication/tcp/streams.py b/synapse/replication/tcp/streams.py
index 4c60bf79f98..07d99dd63c2 100644
--- a/synapse/replication/tcp/streams.py
+++ b/synapse/replication/tcp/streams.py
@@ -33,7 +33,7 @@
 logger = logging.getLogger(__name__)
 
 
-MAX_EVENTS_BEHIND = 10000
+MAX_EVENTS_BEHIND = 500000
 
 
 EventStreamRow = namedtuple("EventStreamRow", (

From a79823e64b0aa271e085d3fa8e29724b4b752cfe Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 14 Feb 2018 12:17:14 +0000
Subject: [PATCH 007/278] Add dummy presence REST handler to frontend proxy

The handler no-ops all requests as presence is disabled.
---
 synapse/app/frontend_proxy.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py
index d2bae4ad031..e6db28333fb 100644
--- a/synapse/app/frontend_proxy.py
+++ b/synapse/app/frontend_proxy.py
@@ -37,6 +37,7 @@
 from synapse.replication.slave.storage.devices import SlavedDeviceStore
 from synapse.replication.slave.storage.registration import SlavedRegistrationStore
 from synapse.replication.tcp.client import ReplicationClientHandler
+from synapse.rest.client.v1.base import ClientV1RestServlet, client_path_patterns
 from synapse.rest.client.v2_alpha._base import client_v2_patterns
 from synapse.server import HomeServer
 from synapse.storage.engines import create_engine
@@ -50,6 +51,35 @@
 logger = logging.getLogger("synapse.app.frontend_proxy")
 
 
+class PresenceStatusStubServlet(ClientV1RestServlet):
+    PATTERNS = client_path_patterns("/presence/(?P<user_id>[^/]*)/status")
+
+    def __init__(self, hs):
+        super(PresenceStatusStubServlet, self).__init__(hs)
+        self.http_client = hs.get_simple_http_client()
+        self.auth = hs.get_auth()
+        self.main_uri = hs.config.worker_main_http_uri
+
+    @defer.inlineCallbacks
+    def on_GET(self, request, user_id):
+        # Pass through the auth headers, if any, in case the access token
+        # is there.
+        auth_headers = request.requestHeaders.getRawHeaders("Authorization", [])
+        headers = {
+            "Authorization": auth_headers,
+        }
+        result = yield self.http_client.get_json(
+            self.main_uri + request.uri,
+            headers=headers,
+        )
+        defer.returnValue((200, result))
+
+    @defer.inlineCallbacks
+    def on_PUT(self, request, user_id):
+        yield self.auth.get_user_by_req(request)
+        defer.returnValue((200, {}))
+
+
 class KeyUploadServlet(RestServlet):
     PATTERNS = client_v2_patterns("/keys/upload(/(?P<device_id>[^/]+))?$")
 
@@ -136,6 +166,7 @@ def _listen_http(self, listener_config):
                 elif name == "client":
                     resource = JsonResource(self, canonical_json=False)
                     KeyUploadServlet(self).register(resource)
+                    PresenceStatusStubServlet(self).register(resource)
                     resources.update({
                         "/_matrix/client/r0": resource,
                         "/_matrix/client/unstable": resource,

From 637387483348928c368963bf91dab6213ef336bb Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 14 Feb 2018 12:23:16 +0000
Subject: [PATCH 008/278] Move event sending to end in shutdown room admin api

---
 synapse/rest/client/v1/admin.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py
index ddaedb2a8c3..e4036eaa0cb 100644
--- a/synapse/rest/client/v1/admin.py
+++ b/synapse/rest/client/v1/admin.py
@@ -312,17 +312,6 @@ def on_POST(self, request, room_id):
         )
         new_room_id = info["room_id"]
 
-        yield self.event_creation_handler.create_and_send_nonmember_event(
-            room_creator_requester,
-            {
-                "type": "m.room.message",
-                "content": {"body": message, "msgtype": "m.text"},
-                "room_id": new_room_id,
-                "sender": new_room_user_id,
-            },
-            ratelimit=False,
-        )
-
         requester_user_id = requester.user.to_string()
 
         logger.info("Shutting down room %r", room_id)
@@ -360,6 +349,17 @@ def on_POST(self, request, room_id):
 
             kicked_users.append(user_id)
 
+        yield self.event_creation_handler.create_and_send_nonmember_event(
+            room_creator_requester,
+            {
+                "type": "m.room.message",
+                "content": {"body": message, "msgtype": "m.text"},
+                "room_id": new_room_id,
+                "sender": new_room_user_id,
+            },
+            ratelimit=False,
+        )
+
         aliases_for_room = yield self.store.get_aliases_for_room(room_id)
 
         yield self.store.update_aliases_for_room(

From 7a1406d144ca32b83a050e947f423cda8e964cd6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 14 Feb 2018 12:23:59 +0000
Subject: [PATCH 009/278] Prefill client_ip_last_seen in replication

---
 synapse/replication/slave/storage/client_ips.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/synapse/replication/slave/storage/client_ips.py b/synapse/replication/slave/storage/client_ips.py
index 352c9a2aa80..fedf7a31888 100644
--- a/synapse/replication/slave/storage/client_ips.py
+++ b/synapse/replication/slave/storage/client_ips.py
@@ -42,6 +42,8 @@ def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id):
         if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY:
             return
 
+        self.client_ip_last_seen.prefill(key, now)
+
         self.hs.get_tcp_replication().send_user_ip(
             user_id, access_token, ip, user_agent, device_id, now
         )

From 8f8ea91eefcc43c5ac24e85b14a86af4da53e6e0 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 14 Feb 2018 12:24:26 +0000
Subject: [PATCH 010/278] Bump LAST_SEEN_GRANULARITY in client_ips

---
 synapse/storage/client_ips.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py
index 968d2fed223..30d3541cf7c 100644
--- a/synapse/storage/client_ips.py
+++ b/synapse/storage/client_ips.py
@@ -30,7 +30,7 @@
 # Number of msec of granularity to store the user IP 'last seen' time. Smaller
 # times give more inserts into the database even for readonly API hits
 # 120 seconds == 2 minutes
-LAST_SEEN_GRANULARITY = 120 * 1000
+LAST_SEEN_GRANULARITY = 10 * 60 * 1000
 
 
 class ClientIpStore(background_updates.BackgroundUpdateStore):

From fae708c0e8c35930f1172322b7c0e9f0b1b3f9a4 Mon Sep 17 00:00:00 2001
From: hera <matrix@template.upcloud.com>
Date: Thu, 12 Oct 2017 10:41:46 +0000
Subject: [PATCH 011/278] Disable auth on room_members for now

because the moznet bridge is broken (https://github.com/matrix-org/matrix-appservice-irc/issues/506)
---
 synapse/handlers/message.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 7b9946ab910..b194f113cea 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -390,7 +390,7 @@ def get_joined_members(self, requester, room_id):
         # If this is an AS, double check that they are allowed to see the members.
         # This can either be because the AS user is in the room or becuase there
         # is a user in the room that the AS is "interested in"
-        if requester.app_service and user_id not in users_with_profile:
+        if False and requester.app_service and user_id not in users_with_profile:
             for uid in users_with_profile:
                 if requester.app_service.is_interested_in_user(uid):
                     break

From 1031bd25f8f37413f4101b0a049a598f1d95f8d3 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 24 Nov 2017 00:38:44 +0000
Subject: [PATCH 012/278] Avoid doing presence updates on replication reconnect

Presence is supposed to be disabled on matrix.org, so we shouldn't send a load
of USER_SYNC commands every time the synchrotron reconnects to the master.
---
 synapse/app/synchrotron.py   | 2 ++
 synapse/handlers/presence.py | 1 +
 2 files changed, 3 insertions(+)

diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py
index 3cade3c68eb..67a2d2b7db3 100644
--- a/synapse/app/synchrotron.py
+++ b/synapse/app/synchrotron.py
@@ -214,6 +214,8 @@ def process_replication_rows(self, token, rows):
         yield self.notify_from_replication(states, stream_id)
 
     def get_currently_syncing_users(self):
+        # presence is disabled on matrix.org, so we return the empty set
+        return set()
         return [
             user_id for user_id, count in iteritems(self.user_to_num_current_syncs)
             if count > 0
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index c7336020a25..95953efb089 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -467,6 +467,7 @@ def get_currently_syncing_users(self):
         Returns:
             set(str): A set of user_id strings.
         """
+        # presence is disabled on matrix.org, so we return the empty set
         return set()
         syncing_user_ids = {
             user_id for user_id, count in self.user_to_num_current_syncs.items()

From e21c312e16b33ebcf8f031757986c601fe0320c4 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 21 Feb 2018 21:09:33 +0000
Subject: [PATCH 013/278] Actuall set cache factors in workers

---
 synapse/app/synctl.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py
index 56ae0861283..8c525459c7e 100755
--- a/synapse/app/synctl.py
+++ b/synapse/app/synctl.py
@@ -108,7 +108,7 @@ def stop(pidfile, app):
 
 
 Worker = collections.namedtuple("Worker", [
-    "app", "configfile", "pidfile", "cache_factor"
+    "app", "configfile", "pidfile", "cache_factor", "cache_factors",
 ])
 
 
@@ -215,6 +215,10 @@ def main():
                 or pidfile
             )
             worker_cache_factor = worker_config.get("synctl_cache_factor") or cache_factor
+            worker_cache_factors = (
+                worker_config.get("synctl_cache_factors")
+                or cache_factors
+            )
             daemonize = worker_config.get("daemonize") or config.get("daemonize")
             assert daemonize, "Main process must have daemonize set to true"
 
@@ -230,8 +234,10 @@ def main():
             assert worker_daemonize, "In config %r: expected '%s' to be True" % (
                 worker_configfile, "worker_daemonize")
             worker_cache_factor = worker_config.get("synctl_cache_factor")
+            worker_cache_factors = worker_config.get("synctl_cache_factors", {})
         workers.append(Worker(
             worker_app, worker_configfile, worker_pidfile, worker_cache_factor,
+            worker_cache_factors,
         ))
 
     action = options.action
@@ -266,15 +272,19 @@ def main():
             start(configfile)
 
         for worker in workers:
+            env = os.environ.copy()
+
             if worker.cache_factor:
                 os.environ["SYNAPSE_CACHE_FACTOR"] = str(worker.cache_factor)
 
+            for cache_name, factor in worker.cache_factors.iteritems():
+                os.environ["SYNAPSE_CACHE_FACTOR_" + cache_name.upper()] = str(factor)
+
             start_worker(worker.app, configfile, worker.configfile)
 
-            if cache_factor:
-                os.environ["SYNAPSE_CACHE_FACTOR"] = str(cache_factor)
-            else:
-                os.environ.pop("SYNAPSE_CACHE_FACTOR", None)
+            # Reset env back to the original
+            os.environ.clear()
+            os.environ.update(env)
 
 
 if __name__ == "__main__":

From 0ca2857baab2d241a740a6ba5e8ed36d17756338 Mon Sep 17 00:00:00 2001
From: hera <matrix@template.upcloud.com>
Date: Wed, 28 Feb 2018 00:35:03 +0000
Subject: [PATCH 014/278] increase sync cache to 2 minutes

to give synchrotrons being hammered by repeating initial /syncs to get more
chance to actually complete and avoid a DoS
---
 synapse/handlers/sync.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 1e8a50514b4..8df66b0fcf7 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -32,6 +32,7 @@
 
 logger = logging.getLogger(__name__)
 
+SYNC_RESPONSE_CACHE_MS = 2 * 60 * 1000
 
 SyncConfig = collections.namedtuple("SyncConfig", [
     "user",
@@ -178,7 +179,9 @@ def __init__(self, hs):
         self.presence_handler = hs.get_presence_handler()
         self.event_sources = hs.get_event_sources()
         self.clock = hs.get_clock()
-        self.response_cache = ResponseCache(hs, "sync")
+        self.response_cache = ResponseCache(
+            hs, "sync", timeout_ms=SYNC_RESPONSE_CACHE_MS,
+        )
         self.state = hs.get_state_handler()
 
     def wait_for_sync_for_user(self, sync_config, since_token=None, timeout=0,

From e18378c3e26a6464c14a61a30d201ecd97b7ef4e Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 4 Apr 2018 16:14:02 +0100
Subject: [PATCH 015/278] Increase member limiter to 20

Let's see if this makes the bridges go faster, or if it kills the synapse
master.
---
 synapse/handlers/room_member.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 5e351df8aa4..a5ea2c3ae26 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -68,7 +68,7 @@ def __init__(self, hs):
         self.event_creation_hander = hs.get_event_creation_handler()
 
         self.member_linearizer = Linearizer(name="member")
-        self.member_limiter = Limiter(3)
+        self.member_limiter = Limiter(20)
 
         self.clock = hs.get_clock()
         self.spam_checker = hs.get_spam_checker()

From 80786d5cafe3bfe7637116c2208455384f65e97b Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 4 Apr 2018 17:30:02 +0100
Subject: [PATCH 016/278] Logging for get_users_in_room

---
 synapse/storage/roommember.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 829cc4a207d..3032f101850 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -68,6 +68,7 @@ def get_hosts_in_room(self, room_id, cache_context):
         defer.returnValue(hosts)
 
     @cached(max_entries=100000, iterable=True)
+    @defer.inlineCallbacks
     def get_users_in_room(self, room_id):
         def f(txn):
             sql = (
@@ -81,7 +82,14 @@ def f(txn):
 
             txn.execute(sql, (room_id, Membership.JOIN,))
             return [to_ascii(r[0]) for r in txn]
-        return self.runInteraction("get_users_in_room", f)
+        start_time = self._clock.time_msec()
+        result = yield self.runInteraction("get_users_in_room", f)
+        end_time = self._clock.time_msec()
+        logger.info(
+            "Fetched room membership for %s (%i users) in %i ms",
+            room_id, len(result), end_time - start_time,
+        )
+        defer.returnValue(result)
 
     @cached()
     def get_invited_rooms_for_user(self, user_id):

From ce9d0b1d0c0b362194138093004a7bc7ce147cc2 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 4 Apr 2018 23:20:30 +0100
Subject: [PATCH 017/278] Fix earlier logging patch

`@cached` doesn't work on decorated functions, because it uses inspection on
the target to calculate the number of arguments.
---
 synapse/storage/roommember.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 3032f101850..682c637c048 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -67,8 +67,7 @@ def get_hosts_in_room(self, room_id, cache_context):
         hosts = frozenset(get_domain_from_id(user_id) for user_id in user_ids)
         defer.returnValue(hosts)
 
-    @cached(max_entries=100000, iterable=True)
-    @defer.inlineCallbacks
+    @cachedInlineCallbacks(max_entries=100000, iterable=True)
     def get_users_in_room(self, room_id):
         def f(txn):
             sql = (

From 463e7c27097a4e48315365c6fc6cbe6bfe4493c8 Mon Sep 17 00:00:00 2001
From: hera <matrix@template.upcloud.com>
Date: Tue, 24 Apr 2018 11:30:54 +0000
Subject: [PATCH 018/278] Lower member limiter

---
 synapse/handlers/room_member.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index a5ea2c3ae26..edc33e466a6 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -68,7 +68,7 @@ def __init__(self, hs):
         self.event_creation_hander = hs.get_event_creation_handler()
 
         self.member_linearizer = Linearizer(name="member")
-        self.member_limiter = Limiter(20)
+        self.member_limiter = Limiter(10)
 
         self.clock = hs.get_clock()
         self.spam_checker = hs.get_spam_checker()

From 9e38981ae47d03467a954c3c540c51b567f6e50b Mon Sep 17 00:00:00 2001
From: aphrodite <matrix@aphrodite.matrix.org>
Date: Mon, 4 Jun 2018 14:24:28 +0000
Subject: [PATCH 019/278] Send HTTP pushes direct to http-priv rather than via
 clouldflare

(This is a heinous hack that ought to be made more generic and pushed back to develop)
---
 synapse/push/httppusher.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index bf7ff74a1a5..e0ccbfa360f 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -329,7 +329,12 @@ def dispatch_push(self, event, tweaks, badge):
         if not notification_dict:
             defer.returnValue([])
         try:
-            resp = yield self.http_client.post_json_get_json(self.url, notification_dict)
+            url = self.url.replace(
+                "https://matrix.org/_matrix/push/v1/notify",
+                "http://http-priv.matrix.org/_matrix/push/v1/notify",
+            )
+
+            resp = yield self.http_client.post_json_get_json(url, notification_dict)
         except Exception:
             logger.warn(
                 "Failed to push event %s to %s",

From 7a32fa01013b1faeace51c71cb98e8a981e79b3b Mon Sep 17 00:00:00 2001
From: David Baker <dave@matrix.org>
Date: Tue, 26 Jun 2018 10:52:52 +0100
Subject: [PATCH 020/278] Fix error on deleting users pending deactivation

Use simple_delete instead of simple_delete_one as commented
---
 synapse/storage/registration.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 9c9cf46e7fb..0d18f6d8692 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -623,7 +623,9 @@ def del_user_pending_deactivation(self, user_id):
         Removes the given user to the table of users who need to be parted from all the
         rooms they're in, effectively marking that user as fully deactivated.
         """
-        return self._simple_delete_one(
+        # XXX: This should be simple_delete_one but we failed to put a unique index on
+        # the table, so somehow duplicate entries have ended up in it.
+        return self._simple_delete(
             "users_pending_deactivation",
             keyvalues={
                 "user_id": user_id,

From ca21957b8a4942e829050834297afb290338c120 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 2 Jul 2018 13:56:08 +0100
Subject: [PATCH 021/278] Timeout membership requests after 90s

This is a hacky fix to try and stop in flight requests from building up
---
 synapse/handlers/room_member.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index edc33e466a6..e0950288df2 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -246,8 +246,22 @@ def update_membership(
         if requester.app_service:
             as_id = requester.app_service.id
 
+        then = self.clock.time_msec()
+
         with (yield self.member_limiter.queue(as_id)):
+           diff = self.clock.time_msec() - then
+
+           if diff > 90 * 1000:
+               # haproxy would have timed the request out anyway...
+               raise SynapseError(504, "took to long to process")
+
             with (yield self.member_linearizer.queue(key)):
+                diff = self.clock.time_msec() - then
+
+                if diff > 90 * 1000:
+                    # haproxy would have timed the request out anyway...
+                    raise SynapseError(504, "took to long to process")
+
                 result = yield self._update_membership(
                     requester,
                     target,

From bf425e533ed678c6e22e2d8655263f4ab6d1edc6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 3 Jul 2018 10:11:09 +0100
Subject: [PATCH 022/278] Fix PEP8

---
 synapse/handlers/room_member.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index e0950288df2..7d509f9e081 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -249,11 +249,11 @@ def update_membership(
         then = self.clock.time_msec()
 
         with (yield self.member_limiter.queue(as_id)):
-           diff = self.clock.time_msec() - then
+            diff = self.clock.time_msec() - then
 
-           if diff > 90 * 1000:
-               # haproxy would have timed the request out anyway...
-               raise SynapseError(504, "took to long to process")
+            if diff > 90 * 1000:
+                # haproxy would have timed the request out anyway...
+                raise SynapseError(504, "took to long to process")
 
             with (yield self.member_linearizer.queue(key)):
                 diff = self.clock.time_msec() - then

From 27eb4c45cda26a6d4d02351a8fa7a3cd07c4ab7f Mon Sep 17 00:00:00 2001
From: hera <matrix@template.upcloud.com>
Date: Mon, 23 Jul 2018 15:16:36 +0000
Subject: [PATCH 023/278] Lower hacky timeout for member limiter

---
 synapse/handlers/room_member.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 5b911051765..4872796a319 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -251,14 +251,14 @@ def update_membership(
         with (yield self.member_limiter.queue(as_id)):
             diff = self.clock.time_msec() - then
 
-            if diff > 90 * 1000:
+            if diff > 80 * 1000:
                 # haproxy would have timed the request out anyway...
                 raise SynapseError(504, "took to long to process")
 
             with (yield self.member_linearizer.queue(key)):
                 diff = self.clock.time_msec() - then
 
-                if diff > 90 * 1000:
+                if diff > 80 * 1000:
                     # haproxy would have timed the request out anyway...
                     raise SynapseError(504, "took to long to process")
 

From e43eb47c5f73288188677cc53ffffadb3a62a34e Mon Sep 17 00:00:00 2001
From: hera <matrix@template.upcloud.com>
Date: Mon, 23 Jul 2018 15:22:47 +0000
Subject: [PATCH 024/278] Fixup limiter

---
 synapse/handlers/room_member.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 4872796a319..6184737cd48 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -30,7 +30,7 @@
 from synapse.api.constants import EventTypes, Membership
 from synapse.api.errors import AuthError, Codes, SynapseError
 from synapse.types import RoomID, UserID
-from synapse.util.async import Limiter, Linearizer
+from synapse.util.async import Linearizer
 from synapse.util.distributor import user_joined_room, user_left_room
 
 logger = logging.getLogger(__name__)
@@ -66,7 +66,7 @@ def __init__(self, hs):
         self.event_creation_hander = hs.get_event_creation_handler()
 
         self.member_linearizer = Linearizer(name="member")
-        self.member_limiter = Limiter(10)
+        self.member_limiter = Linearizer(max_count=10, name="member_as_limiter")
 
         self.clock = hs.get_clock()
         self.spam_checker = hs.get_spam_checker()

From 6e15b5debe3014ff513233a5faa1ad052ad921c2 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 26 Sep 2018 13:25:52 +0100
Subject: [PATCH 025/278] Revert "Actuall set cache factors in workers"

This reverts commit e21c312e16b33ebcf8f031757986c601fe0320c4.
---
 synapse/app/synctl.py | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py
index 356e5cb6a72..d658f967ba8 100755
--- a/synapse/app/synctl.py
+++ b/synapse/app/synctl.py
@@ -111,7 +111,7 @@ def stop(pidfile, app):
 
 
 Worker = collections.namedtuple("Worker", [
-    "app", "configfile", "pidfile", "cache_factor", "cache_factors",
+    "app", "configfile", "pidfile", "cache_factor"
 ])
 
 
@@ -218,10 +218,6 @@ def main():
                 or pidfile
             )
             worker_cache_factor = worker_config.get("synctl_cache_factor") or cache_factor
-            worker_cache_factors = (
-                worker_config.get("synctl_cache_factors")
-                or cache_factors
-            )
             daemonize = worker_config.get("daemonize") or config.get("daemonize")
             assert daemonize, "Main process must have daemonize set to true"
 
@@ -237,10 +233,8 @@ def main():
             assert worker_daemonize, "In config %r: expected '%s' to be True" % (
                 worker_configfile, "worker_daemonize")
             worker_cache_factor = worker_config.get("synctl_cache_factor")
-            worker_cache_factors = worker_config.get("synctl_cache_factors", {})
         workers.append(Worker(
             worker_app, worker_configfile, worker_pidfile, worker_cache_factor,
-            worker_cache_factors,
         ))
 
     action = options.action
@@ -275,19 +269,15 @@ def main():
             start(configfile)
 
         for worker in workers:
-            env = os.environ.copy()
-
             if worker.cache_factor:
                 os.environ["SYNAPSE_CACHE_FACTOR"] = str(worker.cache_factor)
 
-            for cache_name, factor in worker.cache_factors.iteritems():
-                os.environ["SYNAPSE_CACHE_FACTOR_" + cache_name.upper()] = str(factor)
-
             start_worker(worker.app, configfile, worker.configfile)
 
-            # Reset env back to the original
-            os.environ.clear()
-            os.environ.update(env)
+            if cache_factor:
+                os.environ["SYNAPSE_CACHE_FACTOR"] = str(cache_factor)
+            else:
+                os.environ.pop("SYNAPSE_CACHE_FACTOR", None)
 
 
 if __name__ == "__main__":

From 5c0c4b4079b43e6929cca19de63b580cb6863f06 Mon Sep 17 00:00:00 2001
From: hera <matrix@hera.matrix.org>
Date: Thu, 8 Nov 2018 11:03:08 +0000
Subject: [PATCH 026/278] Fix encoding error for consent form on python3

The form was rendering this as "b'01234....'".

-- richvdh
---
 synapse/rest/consent/consent_resource.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py
index e0f7de5d5c4..8009b7ff1c9 100644
--- a/synapse/rest/consent/consent_resource.py
+++ b/synapse/rest/consent/consent_resource.py
@@ -160,7 +160,9 @@ def _async_render_GET(self, request):
         try:
             self._render_template(
                 request, "%s.html" % (version,),
-                user=username, userhmac=userhmac, version=version,
+                user=username,
+                userhmac=userhmac.decode('ascii'),
+                version=version,
                 has_consented=has_consented, public_version=public_version,
             )
         except TemplateNotFound:

From 9accd63a387013081ef0035b919bd2c32910c752 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 30 Nov 2018 12:04:38 +0000
Subject: [PATCH 027/278] Initial patch from Erik

---
 synapse/rest/client/v1/room.py   | 25 ++++++++++++++++++++
 synapse/storage/events_worker.py | 40 ++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index fcfe7857f6a..218dbb93ff6 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -825,6 +825,30 @@ def on_GET(self, request):
         defer.returnValue((200, {"joined_rooms": list(room_ids)}))
 
 
+class TimestampLookupRestServlet(ClientV1RestServlet):
+    PATTERNS = client_path_patterns("/rooms/(?P<room_id>[^/]*)/timestamp_to_event$")
+
+    def __init__(self, hs):
+        super(TimestampLookupRestServlet, self).__init__(hs)
+        self.store = hs.get_datastore()
+
+    @defer.inlineCallbacks
+    def on_GET(self, request, room_id):
+        requester = yield self.auth.get_user_by_req(request)
+        yield self.auth.check_joined_room(room_id, requester.user.to_string())
+
+        timestamp = parse_integer(request, "ts")
+        thread_id = parse_integer(request, "thread_id", 0)
+
+        event_id = yield self.store.get_event_for_timestamp(
+            room_id, thread_id, timestamp,
+        )
+
+        defer.returnValue((200, {
+            "event_id": event_id,
+        }))
+
+
 def register_txn_path(servlet, regex_string, http_server, with_get=False):
     """Registers a transaction-based path.
 
@@ -874,6 +898,7 @@ def register_servlets(hs, http_server):
     JoinedRoomsRestServlet(hs).register(http_server)
     RoomEventServlet(hs).register(http_server)
     RoomEventContextServlet(hs).register(http_server)
+    TimestampLookupRestServlet(hs).register(http_server)
 
 
 def register_deprecated_servlets(hs, http_server):
diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py
index a8326f5296a..e4d8562396b 100644
--- a/synapse/storage/events_worker.py
+++ b/synapse/storage/events_worker.py
@@ -526,3 +526,43 @@ def f(txn):
             return res
 
         return self.runInteraction("get_rejection_reasons", f)
+
+    def get_event_for_timestamp(self, room_id, thread_id, timestamp):
+        sql_template = """
+            SELECT event_id, origin_server_ts FROM events
+            WHERE
+                origin_server_ts %s ?
+                AND room_id = ?
+                AND thread_id = ?
+            ORDER BY origin_server_ts
+            LIMIT 1;
+        """
+
+        def f(txn):
+            txn.execute(sql_template % ("<=",), (timestamp, room_id, thread_id))
+            row = txn.fetchone()
+            if row:
+                event_id_before, ts_before = row
+            else:
+                event_id_before, ts_before = None, None
+
+            txn.execute(sql_template % (">=",), (timestamp, room_id, thread_id))
+            row = txn.fetchone()
+            if row:
+                event_id_after, ts_after = row
+            else:
+                event_id_after, ts_after = None, None
+
+            if event_id_before and event_id_before:
+                # Return the closest one
+                if (timestamp - ts_before) < (ts_after - timestamp):
+                    return event_id_before
+                else:
+                    return event_id_after
+
+            if event_id_before:
+                return event_id_before
+
+            return event_id_after
+
+        return self.runInteraction("get_event_for_timestamp", f)

From 080025e5331d4d51943590800adb9c6c5257a047 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 30 Nov 2018 12:09:33 +0000
Subject: [PATCH 028/278] Fix buglet and remove thread_id stuff

---
 synapse/rest/client/v1/room.py   | 3 +--
 synapse/storage/events_worker.py | 9 ++++-----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 218dbb93ff6..f296257b11a 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -838,10 +838,9 @@ def on_GET(self, request, room_id):
         yield self.auth.check_joined_room(room_id, requester.user.to_string())
 
         timestamp = parse_integer(request, "ts")
-        thread_id = parse_integer(request, "thread_id", 0)
 
         event_id = yield self.store.get_event_for_timestamp(
-            room_id, thread_id, timestamp,
+            room_id, timestamp,
         )
 
         defer.returnValue((200, {
diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py
index e4d8562396b..c5fee97ac9f 100644
--- a/synapse/storage/events_worker.py
+++ b/synapse/storage/events_worker.py
@@ -527,33 +527,32 @@ def f(txn):
 
         return self.runInteraction("get_rejection_reasons", f)
 
-    def get_event_for_timestamp(self, room_id, thread_id, timestamp):
+    def get_event_for_timestamp(self, room_id, timestamp):
         sql_template = """
             SELECT event_id, origin_server_ts FROM events
             WHERE
                 origin_server_ts %s ?
                 AND room_id = ?
-                AND thread_id = ?
             ORDER BY origin_server_ts
             LIMIT 1;
         """
 
         def f(txn):
-            txn.execute(sql_template % ("<=",), (timestamp, room_id, thread_id))
+            txn.execute(sql_template % ("<=",), (timestamp, room_id))
             row = txn.fetchone()
             if row:
                 event_id_before, ts_before = row
             else:
                 event_id_before, ts_before = None, None
 
-            txn.execute(sql_template % (">=",), (timestamp, room_id, thread_id))
+            txn.execute(sql_template % (">=",), (timestamp, room_id))
             row = txn.fetchone()
             if row:
                 event_id_after, ts_after = row
             else:
                 event_id_after, ts_after = None, None
 
-            if event_id_before and event_id_before:
+            if event_id_before and event_id_after:
                 # Return the closest one
                 if (timestamp - ts_before) < (ts_after - timestamp):
                     return event_id_before

From c4074e4ab67e53705029657147e847ee08a429b5 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Mon, 3 Dec 2018 10:15:39 +0000
Subject: [PATCH 029/278] Revert "Merge branch 'rav/timestamp_patch' into
 matrix-org-hotfixes"

This reverts commit 7960e814e5386b32821ef51915e609091e6e1569, reversing
changes made to 3dd704ee9af836b9b3e50daf1017a3d65c9ebaec.

We no longer need this; please redo it as a proper MSC & synapse PR if you want
to keep it...
---
 synapse/rest/client/v1/room.py   | 24 --------------------
 synapse/storage/events_worker.py | 39 --------------------------------
 2 files changed, 63 deletions(-)

diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index f296257b11a..fcfe7857f6a 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -825,29 +825,6 @@ def on_GET(self, request):
         defer.returnValue((200, {"joined_rooms": list(room_ids)}))
 
 
-class TimestampLookupRestServlet(ClientV1RestServlet):
-    PATTERNS = client_path_patterns("/rooms/(?P<room_id>[^/]*)/timestamp_to_event$")
-
-    def __init__(self, hs):
-        super(TimestampLookupRestServlet, self).__init__(hs)
-        self.store = hs.get_datastore()
-
-    @defer.inlineCallbacks
-    def on_GET(self, request, room_id):
-        requester = yield self.auth.get_user_by_req(request)
-        yield self.auth.check_joined_room(room_id, requester.user.to_string())
-
-        timestamp = parse_integer(request, "ts")
-
-        event_id = yield self.store.get_event_for_timestamp(
-            room_id, timestamp,
-        )
-
-        defer.returnValue((200, {
-            "event_id": event_id,
-        }))
-
-
 def register_txn_path(servlet, regex_string, http_server, with_get=False):
     """Registers a transaction-based path.
 
@@ -897,7 +874,6 @@ def register_servlets(hs, http_server):
     JoinedRoomsRestServlet(hs).register(http_server)
     RoomEventServlet(hs).register(http_server)
     RoomEventContextServlet(hs).register(http_server)
-    TimestampLookupRestServlet(hs).register(http_server)
 
 
 def register_deprecated_servlets(hs, http_server):
diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py
index c5fee97ac9f..a8326f5296a 100644
--- a/synapse/storage/events_worker.py
+++ b/synapse/storage/events_worker.py
@@ -526,42 +526,3 @@ def f(txn):
             return res
 
         return self.runInteraction("get_rejection_reasons", f)
-
-    def get_event_for_timestamp(self, room_id, timestamp):
-        sql_template = """
-            SELECT event_id, origin_server_ts FROM events
-            WHERE
-                origin_server_ts %s ?
-                AND room_id = ?
-            ORDER BY origin_server_ts
-            LIMIT 1;
-        """
-
-        def f(txn):
-            txn.execute(sql_template % ("<=",), (timestamp, room_id))
-            row = txn.fetchone()
-            if row:
-                event_id_before, ts_before = row
-            else:
-                event_id_before, ts_before = None, None
-
-            txn.execute(sql_template % (">=",), (timestamp, room_id))
-            row = txn.fetchone()
-            if row:
-                event_id_after, ts_after = row
-            else:
-                event_id_after, ts_after = None, None
-
-            if event_id_before and event_id_after:
-                # Return the closest one
-                if (timestamp - ts_before) < (ts_after - timestamp):
-                    return event_id_before
-                else:
-                    return event_id_after
-
-            if event_id_before:
-                return event_id_before
-
-            return event_id_after
-
-        return self.runInteraction("get_event_for_timestamp", f)

From 046d731fbdf0f7058395fc0c691c9029c1fcb4c9 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 15 Jan 2019 21:07:12 +0000
Subject: [PATCH 030/278] limit remote device lists to 1000 entries per user

---
 synapse/handlers/device.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 9e017116a90..6f80a7dce97 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -532,6 +532,20 @@ def _handle_device_updates(self, user_id):
 
                 stream_id = result["stream_id"]
                 devices = result["devices"]
+
+                # Emergency hack to prevent DoS from
+                # @bot:oliviervandertoorn.nl and @bot:matrix-beta.igalia.com
+                # on Jan 15 2019: only store the most recent 1000 devices for
+                # a given user.  (We assume we receive them in chronological
+                # order, which is dubious given _get_e2e_device_keys_txn does
+                # not explicitly order its results).  Otherwise it can take
+                # longer than 60s to persist the >100K devices, at which point
+                # the internal replication request to handle the
+                # m.device_list_update EDU times out, causing the remote
+                # server to retry the transaction and thus DoS synapse master
+                # CPU and DB.
+                devices = devices[-1000:]
+
                 yield self.store.update_remote_device_list_cache(
                     user_id, devices, stream_id,
                 )

From 482d06774ac456943fb7e519a78431c82da305ca Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 15 Jan 2019 21:38:07 +0000
Subject: [PATCH 031/278] don't store remote device lists if they have more
 than 10K devices

---
 synapse/handlers/device.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 6f80a7dce97..5bca62418ef 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -533,18 +533,19 @@ def _handle_device_updates(self, user_id):
                 stream_id = result["stream_id"]
                 devices = result["devices"]
 
-                # Emergency hack to prevent DoS from
-                # @bot:oliviervandertoorn.nl and @bot:matrix-beta.igalia.com
-                # on Jan 15 2019: only store the most recent 1000 devices for
-                # a given user.  (We assume we receive them in chronological
-                # order, which is dubious given _get_e2e_device_keys_txn does
-                # not explicitly order its results).  Otherwise it can take
-                # longer than 60s to persist the >100K devices, at which point
-                # the internal replication request to handle the
-                # m.device_list_update EDU times out, causing the remote
-                # server to retry the transaction and thus DoS synapse master
-                # CPU and DB.
-                devices = devices[-1000:]
+                # If the remote server has more than ~10000 devices for this user
+                # we assume that something is going horribly wrong (e.g. a bot
+                # that logs in and creates a new device every time it tries to
+                # send a message).  Maintaining lots of devices per user in the
+                # cache can cause serious performance issues as if this request
+                # takes more than 60s to complete, internal replication from the
+                # inbound federation worker to the synapse master may time out
+                # causing the inbound federation to fail and causing the remote
+                # server to retry, causing a DoS.  So in this scenario we give
+                # up on storing the total list of devices and only handle the
+                # delta instead.
+                if len(devices) > 10000:
+                    devices = []
 
                 yield self.store.update_remote_device_list_cache(
                     user_id, devices, stream_id,

From b4796d18149948fed4b864b61f655ecfe068d4b3 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 15 Jan 2019 21:46:29 +0000
Subject: [PATCH 032/278] drop the limit to 1K as e2e will be hosed beyond that
 point anyway

---
 synapse/handlers/device.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 5bca62418ef..a93dfd1d631 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -533,7 +533,7 @@ def _handle_device_updates(self, user_id):
                 stream_id = result["stream_id"]
                 devices = result["devices"]
 
-                # If the remote server has more than ~10000 devices for this user
+                # If the remote server has more than ~1000 devices for this user
                 # we assume that something is going horribly wrong (e.g. a bot
                 # that logs in and creates a new device every time it tries to
                 # send a message).  Maintaining lots of devices per user in the
@@ -544,7 +544,7 @@ def _handle_device_updates(self, user_id):
                 # server to retry, causing a DoS.  So in this scenario we give
                 # up on storing the total list of devices and only handle the
                 # delta instead.
-                if len(devices) > 10000:
+                if len(devices) > 1000:
                     devices = []
 
                 yield self.store.update_remote_device_list_cache(

From 5cb15c0443c6258056b9dcaa8c28bcce5fbb312e Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Tue, 15 Jan 2019 22:10:44 +0000
Subject: [PATCH 033/278] warn if we ignore device lists

---
 synapse/handlers/device.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index a93dfd1d631..8955cde4ed3 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -545,6 +545,10 @@ def _handle_device_updates(self, user_id):
                 # up on storing the total list of devices and only handle the
                 # delta instead.
                 if len(devices) > 1000:
+                    logger.warn(
+                        "Ignoring device list snapshot for %s as it has >1K devs (%d)",
+                        user_id, len(devices)
+                    )
                     devices = []
 
                 yield self.store.update_remote_device_list_cache(

From ef7865e2f2a8cd353a332244e81de8db0370b6d2 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Wed, 23 Jan 2019 14:17:21 +0000
Subject: [PATCH 034/278] Track user_dir current event stream position

---
 synapse/handlers/user_directory.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 3c409993383..55e03325491 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -19,6 +19,7 @@
 
 from twisted.internet import defer
 
+from prometheus_client import Gauge
 from synapse.api.constants import EventTypes, JoinRules, Membership
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.roommember import ProfileInfo
@@ -27,6 +28,8 @@
 
 logger = logging.getLogger(__name__)
 
+# Expose event stream processing position
+event_processing_position = Gauge("event_stream_processing_position", "Currently processed up to position in the event stream")
 
 class UserDirectoryHandler(object):
     """Handles querying of and keeping updated the user_directory.
@@ -163,6 +166,10 @@ def _unsafe_process(self):
                 yield self._handle_deltas(deltas)
 
                 self.pos = deltas[-1]["stream_id"]
+
+                # Expose current event processing position to prometheus
+                event_processing_position.set(self.pos)
+
                 yield self.store.update_user_directory_stream_pos(self.pos)
 
     @defer.inlineCallbacks

From 77dfe51aba3190a146556cff8a32a66a607a5ff9 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Wed, 23 Jan 2019 14:21:19 +0000
Subject: [PATCH 035/278] Name metric consistently

---
 synapse/handlers/user_directory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 55e03325491..6df58060da9 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -29,7 +29,7 @@
 logger = logging.getLogger(__name__)
 
 # Expose event stream processing position
-event_processing_position = Gauge("event_stream_processing_position", "Currently processed up to position in the event stream")
+event_processing_position = Gauge("synapse_user_dir_event_stream_position", "Currently processed up to position in the event stream")
 
 class UserDirectoryHandler(object):
     """Handles querying of and keeping updated the user_directory.

From 7da659dd6d1cadac233855d0b5a2269ba0ac280b Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Wed, 23 Jan 2019 15:01:09 +0000
Subject: [PATCH 036/278] Use existing stream position counter metric

---
 synapse/handlers/user_directory.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 6df58060da9..197a826daa9 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -19,7 +19,6 @@
 
 from twisted.internet import defer
 
-from prometheus_client import Gauge
 from synapse.api.constants import EventTypes, JoinRules, Membership
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.roommember import ProfileInfo
@@ -28,9 +27,6 @@
 
 logger = logging.getLogger(__name__)
 
-# Expose event stream processing position
-event_processing_position = Gauge("synapse_user_dir_event_stream_position", "Currently processed up to position in the event stream")
-
 class UserDirectoryHandler(object):
     """Handles querying of and keeping updated the user_directory.
 
@@ -168,7 +164,8 @@ def _unsafe_process(self):
                 self.pos = deltas[-1]["stream_id"]
 
                 # Expose current event processing position to prometheus
-                event_processing_position.set(self.pos)
+                synapse.metrics.event_processing_positions.labels(
+                    "user_dir").set(self.pos)
 
                 yield self.store.update_user_directory_stream_pos(self.pos)
 

From 4777836b832d715cb2b49879ca93868cdf1ab727 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Wed, 23 Jan 2019 15:26:03 +0000
Subject: [PATCH 037/278] Fix missing synapse metrics import

---
 synapse/handlers/user_directory.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 197a826daa9..a9f062df4b2 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import logging
+import synapse.metrics
 
 from six import iteritems
 

From decb5698b3dabd0c42ec4ce085bb4bb580de8f3a Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 31 Jan 2019 00:23:58 +0000
Subject: [PATCH 038/278] Break infinite loop on redaction in v3 rooms

---
 synapse/storage/events_worker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py
index 57dae324c70..dc09a850873 100644
--- a/synapse/storage/events_worker.py
+++ b/synapse/storage/events_worker.py
@@ -177,7 +177,7 @@ def _get_events(self, event_ids, check_redacted=True,
             # Starting in room version v3, some redactions need to be rechecked if we
             # didn't have the redacted event at the time, so we recheck on read
             # instead.
-            if not allow_rejected and entry.event.type == EventTypes.Redaction:
+            if not allow_rejected and entry.event.type == EventTypes.Redaction and False:
                 if entry.event.internal_metadata.need_to_check_redaction():
                     orig = yield self.get_event(
                         entry.event.redacts,

From e97c1df30c23eaa3ee96ea7b761eb5107af35e6a Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Thu, 31 Jan 2019 13:26:38 +0000
Subject: [PATCH 039/278] remove slow code on userdir (#4534)

---
 synapse/handlers/user_directory.py | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 71ea2e3ceec..f5c3ba23a61 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -338,24 +338,6 @@ def _handle_deltas(self, deltas):
                     public_value=Membership.JOIN,
                 )
 
-                if change is False:
-                    # Need to check if the server left the room entirely, if so
-                    # we might need to remove all the users in that room
-                    is_in_room = yield self.store.is_host_joined(
-                        room_id, self.server_name,
-                    )
-                    if not is_in_room:
-                        logger.info("Server left room: %r", room_id)
-                        # Fetch all the users that we marked as being in user
-                        # directory due to being in the room and then check if
-                        # need to remove those users or not
-                        user_ids = yield self.store.get_users_in_dir_due_to_room(room_id)
-                        for user_id in user_ids:
-                            yield self._handle_remove_user(room_id, user_id)
-                        return
-                    else:
-                        logger.debug("Server is still in room: %r", room_id)
-
                 is_support = yield self.store.is_support_user(state_key)
                 if not is_support:
                     if change is None:

From 6c232a69dfa23dff154e49918b3133669e28d062 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 31 Jan 2019 18:43:49 +0000
Subject: [PATCH 040/278] Revert "Break infinite loop on redaction in v3 rooms"

We've got a better fix of this now.

This reverts commit decb5698b3dabd0c42ec4ce085bb4bb580de8f3a.
---
 synapse/storage/events_worker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py
index dc09a850873..57dae324c70 100644
--- a/synapse/storage/events_worker.py
+++ b/synapse/storage/events_worker.py
@@ -177,7 +177,7 @@ def _get_events(self, event_ids, check_redacted=True,
             # Starting in room version v3, some redactions need to be rechecked if we
             # didn't have the redacted event at the time, so we recheck on read
             # instead.
-            if not allow_rejected and entry.event.type == EventTypes.Redaction and False:
+            if not allow_rejected and entry.event.type == EventTypes.Redaction:
                 if entry.event.internal_metadata.need_to_check_redaction():
                     orig = yield self.get_event(
                         entry.event.redacts,

From f5bafd70f4d641b24303958b650efcf7dc18ab92 Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Sat, 16 Feb 2019 04:34:23 +1100
Subject: [PATCH 041/278] add cache remover endpoint and wire it up

---
 synapse/replication/http/__init__.py     |  3 ++-
 synapse/rest/client/v2_alpha/register.py | 11 +++++++++++
 synapse/storage/registration.py          | 11 +++++++----
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py
index 19f214281ef..f46a580be28 100644
--- a/synapse/replication/http/__init__.py
+++ b/synapse/replication/http/__init__.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 from synapse.http.server import JsonResource
-from synapse.replication.http import federation, membership, send_event
+from synapse.replication.http import federation, membership, send_event, registration
 
 REPLICATION_PREFIX = "/_synapse/replication"
 
@@ -28,3 +28,4 @@ def register_servlets(self, hs):
         send_event.register_servlets(hs, self)
         membership.register_servlets(hs, self)
         federation.register_servlets(hs, self)
+        registration.register_servlets(hs, self)
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 7f812b82093..35319f7a796 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -24,6 +24,7 @@
 
 import synapse
 import synapse.types
+from synapse.replication.http.registration import RegistrationUserCacheInvalidationServlet
 from synapse.api.constants import LoginType
 from synapse.api.errors import Codes, SynapseError, UnrecognizedRequestError
 from synapse.config.server import is_threepid_reserved
@@ -193,6 +194,10 @@ def __init__(self, hs):
         self.device_handler = hs.get_device_handler()
         self.macaroon_gen = hs.get_macaroon_generator()
 
+        self._invalidate_caches_client = (
+            RegistrationUserCacheInvalidationServlet.make_client(hs)
+        )
+
     @interactive_auth_handler
     @defer.inlineCallbacks
     def on_POST(self, request):
@@ -266,6 +271,9 @@ def on_POST(self, request):
 
         # == Shared Secret Registration == (e.g. create new user scripts)
         if 'mac' in body:
+            if self.hs.config.worker_app:
+                raise SynapseError(403, "Not available at this endpoint")
+
             # FIXME: Should we really be determining if this is shared secret
             # auth based purely on the 'mac' key?
             result = yield self._do_shared_secret_registration(
@@ -456,6 +464,9 @@ def on_POST(self, request):
             )
             yield self.registration_handler.post_consent_actions(registered_user_id)
 
+        if self.hs.config.worker_app:
+            self._invalidate_caches_client(registered_user_id)
+
         defer.returnValue((200, return_dict))
 
     def on_OPTIONS(self, _):
diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index c9e11c31359..8b4554f6af4 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -146,6 +146,7 @@ class RegistrationStore(RegistrationWorkerStore,
     def __init__(self, db_conn, hs):
         super(RegistrationStore, self).__init__(db_conn, hs)
 
+        self.hs = hs
         self.clock = hs.get_clock()
 
         self.register_background_index_update(
@@ -321,10 +322,12 @@ def _register(
                 (user_id_obj.localpart, create_profile_with_displayname)
             )
 
-        self._invalidate_cache_and_stream(
-            txn, self.get_user_by_id, (user_id,)
-        )
-        txn.call_after(self.is_guest.invalidate, (user_id,))
+        # Don't invalidate here, it will be done through replication to the worker.
+        if not self.hs.config.worker_app:
+            self._invalidate_cache_and_stream(
+                txn, self.get_user_by_id, (user_id,)
+            )
+            txn.call_after(self.is_guest.invalidate, (user_id,))
 
     def get_users_by_id_case_insensitive(self, user_id):
         """Gets users that match user_id case insensitively.

From 7b5c04312efd0829855a45b19839971c55f81599 Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Sat, 16 Feb 2019 04:35:27 +1100
Subject: [PATCH 042/278] isort

---
 synapse/handlers/user_directory.py       | 1 -
 synapse/replication/http/__init__.py     | 2 +-
 synapse/rest/client/v2_alpha/register.py | 4 +++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index f5c3ba23a61..0dacd9e357e 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import logging
-import synapse.metrics
 
 from six import iteritems
 
diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py
index f46a580be28..3807d2ac6f9 100644
--- a/synapse/replication/http/__init__.py
+++ b/synapse/replication/http/__init__.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 from synapse.http.server import JsonResource
-from synapse.replication.http import federation, membership, send_event, registration
+from synapse.replication.http import federation, membership, registration, send_event
 
 REPLICATION_PREFIX = "/_synapse/replication"
 
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 35319f7a796..ca9a850817a 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -24,7 +24,6 @@
 
 import synapse
 import synapse.types
-from synapse.replication.http.registration import RegistrationUserCacheInvalidationServlet
 from synapse.api.constants import LoginType
 from synapse.api.errors import Codes, SynapseError, UnrecognizedRequestError
 from synapse.config.server import is_threepid_reserved
@@ -34,6 +33,9 @@
     parse_json_object_from_request,
     parse_string,
 )
+from synapse.replication.http.registration import (
+    RegistrationUserCacheInvalidationServlet,
+)
 from synapse.util.msisdn import phone_number_to_msisdn
 from synapse.util.ratelimitutils import FederationRateLimiter
 from synapse.util.threepids import check_3pid_allowed

From ce5f3b1ba5cefe5d453bd82c06faa8eaa336137f Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Sat, 16 Feb 2019 04:35:58 +1100
Subject: [PATCH 043/278] add all the files

---
 synapse/replication/http/registration.py | 70 ++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 synapse/replication/http/registration.py

diff --git a/synapse/replication/http/registration.py b/synapse/replication/http/registration.py
new file mode 100644
index 00000000000..5030bde4990
--- /dev/null
+++ b/synapse/replication/http/registration.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from twisted.internet import defer
+
+from synapse.events import event_type_from_format_version
+from synapse.events.snapshot import EventContext
+from synapse.http.servlet import parse_json_object_from_request
+from synapse.replication.http._base import ReplicationEndpoint
+from synapse.types import Requester, UserID
+from synapse.util.metrics import Measure
+
+logger = logging.getLogger(__name__)
+
+
+class RegistrationUserCacheInvalidationServlet(ReplicationEndpoint):
+    """
+    Invalidate the caches that a registration usually invalidates.
+
+    Request format:
+
+        POST /_synapse/replication/fed_query/:fed_cleanup_room/:txn_id
+
+        {}
+    """
+
+    NAME = "reg_invalidate_user_caches"
+    PATH_ARGS = ("user_id",)
+
+    def __init__(self, hs):
+        super(RegistrationUserCacheInvalidationServlet, self).__init__(hs)
+        self.store = hs.get_datastore()
+
+    @staticmethod
+    def _serialize_payload(user_id, args):
+        """
+        Args:
+            user_id (str)
+        """
+        return {}
+
+    @defer.inlineCallbacks
+    def _handle_request(self, request, user_id):
+
+        def invalidate(txn):
+            self.store._invalidate_cache_and_stream(
+                txn, self.store.get_user_by_id, (user_id,)
+            )
+            txn.call_after(self.store.is_guest.invalidate, (user_id,))
+
+        yield self.store.runInteraction("user_invalidate_caches", invalidate)
+        defer.returnValue((200, {}))
+
+
+def register_servlets(hs, http_server):
+    RegistrationUserCacheInvalidationServlet(hs).register(http_server)

From d9235b9e29c88f8992b64904382e35905749351b Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Sat, 16 Feb 2019 04:39:49 +1100
Subject: [PATCH 044/278] fix appservice, add to frontend proxy

---
 synapse/app/frontend_proxy.py            | 6 ++++++
 synapse/rest/client/v2_alpha/register.py | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py
index d5b954361d9..5d450718c6c 100644
--- a/synapse/app/frontend_proxy.py
+++ b/synapse/app/frontend_proxy.py
@@ -39,8 +39,12 @@
 from synapse.replication.tcp.client import ReplicationClientHandler
 from synapse.rest.client.v1.base import ClientV1RestServlet, client_path_patterns
 from synapse.rest.client.v2_alpha._base import client_v2_patterns
+from synapse.rest.client.v2_alpha.register import (
+    register_servlets as register_registration_servlets,
+)
 from synapse.server import HomeServer
 from synapse.storage.engines import create_engine
+from synapse.storage.registration import RegistrationStore
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.logcontext import LoggingContext
 from synapse.util.manhole import manhole
@@ -141,6 +145,7 @@ class FrontendProxySlavedStore(
     SlavedClientIpStore,
     SlavedApplicationServiceStore,
     SlavedRegistrationStore,
+    RegistrationStore,
     BaseSlavedStore,
 ):
     pass
@@ -161,6 +166,7 @@ def _listen_http(self, listener_config):
                 elif name == "client":
                     resource = JsonResource(self, canonical_json=False)
                     KeyUploadServlet(self).register(resource)
+                    register_registration_servlets(self, resource)
 
                     # If presence is disabled, use the stub servlet that does
                     # not allow sending presence
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index ca9a850817a..5ca9421a07a 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -479,6 +479,10 @@ def _do_appservice_registration(self, username, as_token, body):
         user_id = yield self.registration_handler.appservice_register(
             username, as_token
         )
+
+        if self.hs.config.worker_app:
+            self._invalidate_caches_client(registered_user_id)
+
         defer.returnValue((yield self._create_registration_details(user_id, body)))
 
     @defer.inlineCallbacks

From e4381ed5144ba16faae0f2aedf07927344e69a4f Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Sat, 16 Feb 2019 04:42:04 +1100
Subject: [PATCH 045/278] pep8

---
 synapse/replication/http/registration.py | 5 -----
 synapse/rest/client/v2_alpha/register.py | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/synapse/replication/http/registration.py b/synapse/replication/http/registration.py
index 5030bde4990..513dafab39a 100644
--- a/synapse/replication/http/registration.py
+++ b/synapse/replication/http/registration.py
@@ -17,12 +17,7 @@
 
 from twisted.internet import defer
 
-from synapse.events import event_type_from_format_version
-from synapse.events.snapshot import EventContext
-from synapse.http.servlet import parse_json_object_from_request
 from synapse.replication.http._base import ReplicationEndpoint
-from synapse.types import Requester, UserID
-from synapse.util.metrics import Measure
 
 logger = logging.getLogger(__name__)
 
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 5ca9421a07a..9f750f7cf5f 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -481,7 +481,7 @@ def _do_appservice_registration(self, username, as_token, body):
         )
 
         if self.hs.config.worker_app:
-            self._invalidate_caches_client(registered_user_id)
+            self._invalidate_caches_client(user_id)
 
         defer.returnValue((yield self._create_registration_details(user_id, body)))
 

From 84528e4fb2e191a065e51ae978755719b8b2ae71 Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Sat, 16 Feb 2019 04:49:09 +1100
Subject: [PATCH 046/278] cleanup

---
 synapse/rest/client/v2_alpha/register.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 9f750f7cf5f..da9ebd540d8 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -467,7 +467,7 @@ def on_POST(self, request):
             yield self.registration_handler.post_consent_actions(registered_user_id)
 
         if self.hs.config.worker_app:
-            self._invalidate_caches_client(registered_user_id)
+            yield self._invalidate_caches_client(registered_user_id)
 
         defer.returnValue((200, return_dict))
 
@@ -481,7 +481,7 @@ def _do_appservice_registration(self, username, as_token, body):
         )
 
         if self.hs.config.worker_app:
-            self._invalidate_caches_client(user_id)
+            yield self._invalidate_caches_client(user_id)
 
         defer.returnValue((yield self._create_registration_details(user_id, body)))
 

From d95252c01fa26f84fe995914f12260b38c200a28 Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Sat, 16 Feb 2019 05:08:58 +1100
Subject: [PATCH 047/278] use a device replication thingy

---
 synapse/replication/http/device.py       | 64 ++++++++++++++++++++++++
 synapse/replication/http/registration.py |  2 +-
 synapse/rest/client/v2_alpha/register.py | 31 +++++++++---
 3 files changed, 90 insertions(+), 7 deletions(-)
 create mode 100644 synapse/replication/http/device.py

diff --git a/synapse/replication/http/device.py b/synapse/replication/http/device.py
new file mode 100644
index 00000000000..4855ba098f1
--- /dev/null
+++ b/synapse/replication/http/device.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from twisted.internet import defer
+
+from synapse.http.servlet import parse_json_object_from_request
+from synapse.replication.http._base import ReplicationEndpoint
+
+logger = logging.getLogger(__name__)
+
+
+class CheckDeviceRegisteredServlet(ReplicationEndpoint):
+    """
+    Check a device is registered.
+
+    """
+
+    NAME = "device_check_registered"
+    PATH_ARGS = ("user_id")
+
+    def __init__(self, hs):
+        super(CheckDeviceRegisteredServlet, self).__init__(hs)
+        self.device_handler = hs.get_device_handler()
+
+    @staticmethod
+    def _serialize_payload(user_id, device_id, initial_display_name):
+        """
+        """
+        return {
+            "device_id": device_id,
+            "initial_display_name": initial_display_name,
+        }
+
+    @defer.inlineCallbacks
+    def _handle_request(self, request, user_id):
+        content = parse_json_object_from_request(request)
+
+        device_id = content["device_id"]
+        initial_display_name = content["initial_display_name"]
+
+        try:
+            device_id = yield self.device_handler.check_device_registered(user_id, device_id)
+        except Exception as e:
+            defer.returnValue((400, str(e))
+
+        defer.returnValue((200, {"device_id": device_id}))
+
+
+def register_servlets(hs, http_server):
+    CheckDeviceRegisteredServlet(hs).register(http_server)
diff --git a/synapse/replication/http/registration.py b/synapse/replication/http/registration.py
index 513dafab39a..0f2f226ae1b 100644
--- a/synapse/replication/http/registration.py
+++ b/synapse/replication/http/registration.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright 2018 New Vector Ltd
+# Copyright 2019 New Vector Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index da9ebd540d8..6f5ac767eba 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -33,6 +33,7 @@
     parse_json_object_from_request,
     parse_string,
 )
+from synapse.replication.http.device import CheckDeviceRegisteredServlet
 from synapse.replication.http.registration import (
     RegistrationUserCacheInvalidationServlet,
 )
@@ -193,12 +194,19 @@ def __init__(self, hs):
         self.registration_handler = hs.get_handlers().registration_handler
         self.identity_handler = hs.get_handlers().identity_handler
         self.room_member_handler = hs.get_room_member_handler()
-        self.device_handler = hs.get_device_handler()
         self.macaroon_gen = hs.get_macaroon_generator()
 
-        self._invalidate_caches_client = (
-            RegistrationUserCacheInvalidationServlet.make_client(hs)
-        )
+        if self.hs.config.worker_app:
+
+            self._invalidate_caches_client = (
+                RegistrationUserCacheInvalidationServlet.make_client(hs)
+            )
+            self._device_check_registered_client = (
+                CheckDeviceRegisteredServlet.make_client(hs)
+            )
+        else:
+            self.device_handler = hs.get_device_handler()
+
 
     @interactive_auth_handler
     @defer.inlineCallbacks
@@ -664,6 +672,17 @@ def _create_registration_details(self, user_id, params):
             })
         defer.returnValue(result)
 
+    def _check_device_registered(self, user_id, device_id, initial_display_name):
+
+        if self.hs.config.worker_app:
+            return self._device_check_registered_client(
+                user_id, device_id, initial_display_name
+            )
+        else:
+            return self.device_handler.check_device_registered(
+                user_id, device_id, initial_display_name
+            )
+
     def _register_device(self, user_id, params):
         """Register a device for a user.
 
@@ -680,7 +699,7 @@ def _register_device(self, user_id, params):
         # register the user's device
         device_id = params.get("device_id")
         initial_display_name = params.get("initial_device_display_name")
-        return self.device_handler.check_device_registered(
+        return self._check_device_registered(
             user_id, device_id, initial_display_name
         )
 
@@ -697,7 +716,7 @@ def _do_guest_registration(self, params):
         # we have nowhere to store it.
         device_id = synapse.api.auth.GUEST_DEVICE_ID
         initial_display_name = params.get("initial_device_display_name")
-        yield self.device_handler.check_device_registered(
+        yield self._check_device_registered(
             user_id, device_id, initial_display_name
         )
 

From 3175edc5d8c5c26125d689cd000c4ada0375b2fb Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Sat, 16 Feb 2019 05:09:08 +1100
Subject: [PATCH 048/278] maybe

---
 synapse/rest/client/v2_alpha/register.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 6f5ac767eba..739c06eaca9 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -677,7 +677,7 @@ def _check_device_registered(self, user_id, device_id, initial_display_name):
         if self.hs.config.worker_app:
             return self._device_check_registered_client(
                 user_id, device_id, initial_display_name
-            )
+            )["device_id"]
         else:
             return self.device_handler.check_device_registered(
                 user_id, device_id, initial_display_name

From 155efa9e363be149c199312fc9876d482471aa67 Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Sat, 16 Feb 2019 05:10:48 +1100
Subject: [PATCH 049/278] fix

---
 synapse/rest/client/v2_alpha/register.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 739c06eaca9..91c0d5e9812 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -672,16 +672,19 @@ def _create_registration_details(self, user_id, params):
             })
         defer.returnValue(result)
 
+    @defer.inlineCallbacks
     def _check_device_registered(self, user_id, device_id, initial_display_name):
 
         if self.hs.config.worker_app:
-            return self._device_check_registered_client(
+            r = yield self._device_check_registered_client(
                 user_id, device_id, initial_display_name
-            )["device_id"]
+            )
+            defer.returnValue(r["device_id"])
         else:
-            return self.device_handler.check_device_registered(
+            r = yield self.device_handler.check_device_registered(
                 user_id, device_id, initial_display_name
             )
+            defer.returnValue(r)
 
     def _register_device(self, user_id, params):
         """Register a device for a user.

From 06622e411000d89e3d42332e44f6a7358d98064e Mon Sep 17 00:00:00 2001
From: Amber Brown <hawkowl@atleastfornow.net>
Date: Sat, 16 Feb 2019 05:11:09 +1100
Subject: [PATCH 050/278] fix

---
 synapse/replication/http/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py
index 3807d2ac6f9..dec63ae68d9 100644
--- a/synapse/replication/http/__init__.py
+++ b/synapse/replication/http/__init__.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 from synapse.http.server import JsonResource
-from synapse.replication.http import federation, membership, registration, send_event
+from synapse.replication.http import federation, membership, registration, send_event, device
 
 REPLICATION_PREFIX = "/_synapse/replication"
 
@@ -29,3 +29,4 @@ def register_servlets(self, hs):
         membership.register_servlets(hs, self)
         federation.register_servlets(hs, self)
         registration.register_servlets(hs, self)
+        device.register_servlets(hs, self)

From c10c71e70d4ac3b87a7ace9d12e8df5fdb5b9b94 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@jki.re>
Date: Fri, 15 Feb 2019 18:15:16 +0000
Subject: [PATCH 051/278] Emergency changes

---
 synapse/app/federation_reader.py        | 6 +++++-
 synapse/crypto/context_factory.py       | 3 ++-
 synapse/federation/federation_server.py | 3 +++
 synapse/replication/http/device.py      | 2 +-
 synapse/replication/tcp/streams.py      | 4 ++--
 5 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py
index 6ee2b76dcdf..27c73f9b501 100644
--- a/synapse/app/federation_reader.py
+++ b/synapse/app/federation_reader.py
@@ -21,7 +21,7 @@
 
 import synapse
 from synapse import events
-from synapse.api.urls import FEDERATION_PREFIX
+from synapse.api.urls import FEDERATION_PREFIX, SERVER_KEY_V2_PREFIX
 from synapse.app import _base
 from synapse.config._base import ConfigError
 from synapse.config.homeserver import HomeServerConfig
@@ -43,6 +43,7 @@
 from synapse.replication.slave.storage.room import RoomStore
 from synapse.replication.slave.storage.transactions import SlavedTransactionStore
 from synapse.replication.tcp.client import ReplicationClientHandler
+from synapse.rest.key.v2 import KeyApiV2Resource
 from synapse.server import HomeServer
 from synapse.storage.engines import create_engine
 from synapse.util.httpresourcetree import create_resource_tree
@@ -97,6 +98,9 @@ def _listen_http(self, listener_config):
                         ),
                     })
 
+                if name in ["keys", "federation"]:
+                    resources[SERVER_KEY_V2_PREFIX] = KeyApiV2Resource(self)
+
         root_resource = create_resource_tree(resources, NoResource())
 
         _base.listen_tcp(
diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py
index 85f2848fb18..83f053d73d1 100644
--- a/synapse/crypto/context_factory.py
+++ b/synapse/crypto/context_factory.py
@@ -128,10 +128,11 @@ class ClientTLSOptionsFactory(object):
 
     def __init__(self, config):
         # We don't use config options yet
+        self._options = CertificateOptions(verify=False)
         pass
 
     def get_options(self, host):
         return ClientTLSOptions(
             host,
-            CertificateOptions(verify=False).getContext()
+            self._options.getContext()
         )
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 3da86d4ba63..c2541b62aff 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -877,6 +877,9 @@ def __init__(self, hs):
     def on_edu(self, edu_type, origin, content):
         """Overrides FederationHandlerRegistry
         """
+        if edu_type == "m.presence":
+            return
+
         handler = self.edu_handlers.get(edu_type)
         if handler:
             return super(ReplicationFederationHandlerRegistry, self).on_edu(
diff --git a/synapse/replication/http/device.py b/synapse/replication/http/device.py
index 4855ba098f1..605de028a08 100644
--- a/synapse/replication/http/device.py
+++ b/synapse/replication/http/device.py
@@ -55,7 +55,7 @@ def _handle_request(self, request, user_id):
         try:
             device_id = yield self.device_handler.check_device_registered(user_id, device_id)
         except Exception as e:
-            defer.returnValue((400, str(e))
+            defer.returnValue((400, str(e)))
 
         defer.returnValue((200, {"device_id": device_id}))
 
diff --git a/synapse/replication/tcp/streams.py b/synapse/replication/tcp/streams.py
index 728746bd129..d49973634e6 100644
--- a/synapse/replication/tcp/streams.py
+++ b/synapse/replication/tcp/streams.py
@@ -265,8 +265,8 @@ def __init__(self, hs):
         store = hs.get_datastore()
         presence_handler = hs.get_presence_handler()
 
-        self.current_token = store.get_current_presence_token
-        self.update_function = presence_handler.get_all_presence_updates
+        self.current_token = lambda: 0
+        self.update_function = lambda _a, _b: []
 
         super(PresenceStream, self).__init__(hs)
 

From 26f524872fef7c345ced6c5228e325ea20a1a13d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 18 Feb 2019 18:36:54 +0000
Subject: [PATCH 052/278] Revert change that cached connection factory

---
 synapse/crypto/context_factory.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/synapse/crypto/context_factory.py b/synapse/crypto/context_factory.py
index 83f053d73d1..85f2848fb18 100644
--- a/synapse/crypto/context_factory.py
+++ b/synapse/crypto/context_factory.py
@@ -128,11 +128,10 @@ class ClientTLSOptionsFactory(object):
 
     def __init__(self, config):
         # We don't use config options yet
-        self._options = CertificateOptions(verify=False)
         pass
 
     def get_options(self, host):
         return ClientTLSOptions(
             host,
-            self._options.getContext()
+            CertificateOptions(verify=False).getContext()
         )

From 5a28154c4d9142d00f7cd174bfd4d796ad44056f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 19 Feb 2019 13:23:14 +0000
Subject: [PATCH 053/278] Revert "Merge pull request #4655 from
 matrix-org/hawkowl/registration-worker"

This reverts commit 93555af5c91f2a242bb890a52cfd8e0ca303d34a, reversing
changes made to 5bd2e2c31dbfed2e69800ee72aef80f7e4bda210.
---
 synapse/replication/http/__init__.py     |  3 +-
 synapse/replication/http/device.py       | 64 ------------------------
 synapse/replication/http/registration.py |  2 +-
 synapse/rest/client/v2_alpha/register.py | 34 +++----------
 4 files changed, 8 insertions(+), 95 deletions(-)
 delete mode 100644 synapse/replication/http/device.py

diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py
index dec63ae68d9..3807d2ac6f9 100644
--- a/synapse/replication/http/__init__.py
+++ b/synapse/replication/http/__init__.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 from synapse.http.server import JsonResource
-from synapse.replication.http import federation, membership, registration, send_event, device
+from synapse.replication.http import federation, membership, registration, send_event
 
 REPLICATION_PREFIX = "/_synapse/replication"
 
@@ -29,4 +29,3 @@ def register_servlets(self, hs):
         membership.register_servlets(hs, self)
         federation.register_servlets(hs, self)
         registration.register_servlets(hs, self)
-        device.register_servlets(hs, self)
diff --git a/synapse/replication/http/device.py b/synapse/replication/http/device.py
deleted file mode 100644
index 605de028a08..00000000000
--- a/synapse/replication/http/device.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2019 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-
-from twisted.internet import defer
-
-from synapse.http.servlet import parse_json_object_from_request
-from synapse.replication.http._base import ReplicationEndpoint
-
-logger = logging.getLogger(__name__)
-
-
-class CheckDeviceRegisteredServlet(ReplicationEndpoint):
-    """
-    Check a device is registered.
-
-    """
-
-    NAME = "device_check_registered"
-    PATH_ARGS = ("user_id")
-
-    def __init__(self, hs):
-        super(CheckDeviceRegisteredServlet, self).__init__(hs)
-        self.device_handler = hs.get_device_handler()
-
-    @staticmethod
-    def _serialize_payload(user_id, device_id, initial_display_name):
-        """
-        """
-        return {
-            "device_id": device_id,
-            "initial_display_name": initial_display_name,
-        }
-
-    @defer.inlineCallbacks
-    def _handle_request(self, request, user_id):
-        content = parse_json_object_from_request(request)
-
-        device_id = content["device_id"]
-        initial_display_name = content["initial_display_name"]
-
-        try:
-            device_id = yield self.device_handler.check_device_registered(user_id, device_id)
-        except Exception as e:
-            defer.returnValue((400, str(e)))
-
-        defer.returnValue((200, {"device_id": device_id}))
-
-
-def register_servlets(hs, http_server):
-    CheckDeviceRegisteredServlet(hs).register(http_server)
diff --git a/synapse/replication/http/registration.py b/synapse/replication/http/registration.py
index 0f2f226ae1b..513dafab39a 100644
--- a/synapse/replication/http/registration.py
+++ b/synapse/replication/http/registration.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright 2019 New Vector Ltd
+# Copyright 2018 New Vector Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index 91c0d5e9812..da9ebd540d8 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -33,7 +33,6 @@
     parse_json_object_from_request,
     parse_string,
 )
-from synapse.replication.http.device import CheckDeviceRegisteredServlet
 from synapse.replication.http.registration import (
     RegistrationUserCacheInvalidationServlet,
 )
@@ -194,19 +193,12 @@ def __init__(self, hs):
         self.registration_handler = hs.get_handlers().registration_handler
         self.identity_handler = hs.get_handlers().identity_handler
         self.room_member_handler = hs.get_room_member_handler()
+        self.device_handler = hs.get_device_handler()
         self.macaroon_gen = hs.get_macaroon_generator()
 
-        if self.hs.config.worker_app:
-
-            self._invalidate_caches_client = (
-                RegistrationUserCacheInvalidationServlet.make_client(hs)
-            )
-            self._device_check_registered_client = (
-                CheckDeviceRegisteredServlet.make_client(hs)
-            )
-        else:
-            self.device_handler = hs.get_device_handler()
-
+        self._invalidate_caches_client = (
+            RegistrationUserCacheInvalidationServlet.make_client(hs)
+        )
 
     @interactive_auth_handler
     @defer.inlineCallbacks
@@ -672,20 +664,6 @@ def _create_registration_details(self, user_id, params):
             })
         defer.returnValue(result)
 
-    @defer.inlineCallbacks
-    def _check_device_registered(self, user_id, device_id, initial_display_name):
-
-        if self.hs.config.worker_app:
-            r = yield self._device_check_registered_client(
-                user_id, device_id, initial_display_name
-            )
-            defer.returnValue(r["device_id"])
-        else:
-            r = yield self.device_handler.check_device_registered(
-                user_id, device_id, initial_display_name
-            )
-            defer.returnValue(r)
-
     def _register_device(self, user_id, params):
         """Register a device for a user.
 
@@ -702,7 +680,7 @@ def _register_device(self, user_id, params):
         # register the user's device
         device_id = params.get("device_id")
         initial_display_name = params.get("initial_device_display_name")
-        return self._check_device_registered(
+        return self.device_handler.check_device_registered(
             user_id, device_id, initial_display_name
         )
 
@@ -719,7 +697,7 @@ def _do_guest_registration(self, params):
         # we have nowhere to store it.
         device_id = synapse.api.auth.GUEST_DEVICE_ID
         initial_display_name = params.get("initial_device_display_name")
-        yield self._check_device_registered(
+        yield self.device_handler.check_device_registered(
             user_id, device_id, initial_display_name
         )
 

From 90ec885805caba1a9a7f8c00094750562517bcba Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 19 Feb 2019 13:23:17 +0000
Subject: [PATCH 054/278] Revert "Merge pull request #4654 from
 matrix-org/hawkowl/registration-worker"

This reverts commit 5bd2e2c31dbfed2e69800ee72aef80f7e4bda210, reversing
changes made to d97c3a6ce651f7ff2ffb8b7ba5abd1e292b62896.
---
 synapse/app/frontend_proxy.py            |  6 ---
 synapse/handlers/user_directory.py       |  1 +
 synapse/replication/http/__init__.py     |  3 +-
 synapse/replication/http/registration.py | 65 ------------------------
 synapse/rest/client/v2_alpha/register.py | 17 -------
 synapse/storage/registration.py          | 11 ++--
 6 files changed, 6 insertions(+), 97 deletions(-)
 delete mode 100644 synapse/replication/http/registration.py

diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py
index 5d450718c6c..d5b954361d9 100644
--- a/synapse/app/frontend_proxy.py
+++ b/synapse/app/frontend_proxy.py
@@ -39,12 +39,8 @@
 from synapse.replication.tcp.client import ReplicationClientHandler
 from synapse.rest.client.v1.base import ClientV1RestServlet, client_path_patterns
 from synapse.rest.client.v2_alpha._base import client_v2_patterns
-from synapse.rest.client.v2_alpha.register import (
-    register_servlets as register_registration_servlets,
-)
 from synapse.server import HomeServer
 from synapse.storage.engines import create_engine
-from synapse.storage.registration import RegistrationStore
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.logcontext import LoggingContext
 from synapse.util.manhole import manhole
@@ -145,7 +141,6 @@ class FrontendProxySlavedStore(
     SlavedClientIpStore,
     SlavedApplicationServiceStore,
     SlavedRegistrationStore,
-    RegistrationStore,
     BaseSlavedStore,
 ):
     pass
@@ -166,7 +161,6 @@ def _listen_http(self, listener_config):
                 elif name == "client":
                     resource = JsonResource(self, canonical_json=False)
                     KeyUploadServlet(self).register(resource)
-                    register_registration_servlets(self, resource)
 
                     # If presence is disabled, use the stub servlet that does
                     # not allow sending presence
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 0dacd9e357e..f5c3ba23a61 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import logging
+import synapse.metrics
 
 from six import iteritems
 
diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py
index 3807d2ac6f9..19f214281ef 100644
--- a/synapse/replication/http/__init__.py
+++ b/synapse/replication/http/__init__.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 from synapse.http.server import JsonResource
-from synapse.replication.http import federation, membership, registration, send_event
+from synapse.replication.http import federation, membership, send_event
 
 REPLICATION_PREFIX = "/_synapse/replication"
 
@@ -28,4 +28,3 @@ def register_servlets(self, hs):
         send_event.register_servlets(hs, self)
         membership.register_servlets(hs, self)
         federation.register_servlets(hs, self)
-        registration.register_servlets(hs, self)
diff --git a/synapse/replication/http/registration.py b/synapse/replication/http/registration.py
deleted file mode 100644
index 513dafab39a..00000000000
--- a/synapse/replication/http/registration.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2018 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-
-from twisted.internet import defer
-
-from synapse.replication.http._base import ReplicationEndpoint
-
-logger = logging.getLogger(__name__)
-
-
-class RegistrationUserCacheInvalidationServlet(ReplicationEndpoint):
-    """
-    Invalidate the caches that a registration usually invalidates.
-
-    Request format:
-
-        POST /_synapse/replication/fed_query/:fed_cleanup_room/:txn_id
-
-        {}
-    """
-
-    NAME = "reg_invalidate_user_caches"
-    PATH_ARGS = ("user_id",)
-
-    def __init__(self, hs):
-        super(RegistrationUserCacheInvalidationServlet, self).__init__(hs)
-        self.store = hs.get_datastore()
-
-    @staticmethod
-    def _serialize_payload(user_id, args):
-        """
-        Args:
-            user_id (str)
-        """
-        return {}
-
-    @defer.inlineCallbacks
-    def _handle_request(self, request, user_id):
-
-        def invalidate(txn):
-            self.store._invalidate_cache_and_stream(
-                txn, self.store.get_user_by_id, (user_id,)
-            )
-            txn.call_after(self.store.is_guest.invalidate, (user_id,))
-
-        yield self.store.runInteraction("user_invalidate_caches", invalidate)
-        defer.returnValue((200, {}))
-
-
-def register_servlets(hs, http_server):
-    RegistrationUserCacheInvalidationServlet(hs).register(http_server)
diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py
index da9ebd540d8..7f812b82093 100644
--- a/synapse/rest/client/v2_alpha/register.py
+++ b/synapse/rest/client/v2_alpha/register.py
@@ -33,9 +33,6 @@
     parse_json_object_from_request,
     parse_string,
 )
-from synapse.replication.http.registration import (
-    RegistrationUserCacheInvalidationServlet,
-)
 from synapse.util.msisdn import phone_number_to_msisdn
 from synapse.util.ratelimitutils import FederationRateLimiter
 from synapse.util.threepids import check_3pid_allowed
@@ -196,10 +193,6 @@ def __init__(self, hs):
         self.device_handler = hs.get_device_handler()
         self.macaroon_gen = hs.get_macaroon_generator()
 
-        self._invalidate_caches_client = (
-            RegistrationUserCacheInvalidationServlet.make_client(hs)
-        )
-
     @interactive_auth_handler
     @defer.inlineCallbacks
     def on_POST(self, request):
@@ -273,9 +266,6 @@ def on_POST(self, request):
 
         # == Shared Secret Registration == (e.g. create new user scripts)
         if 'mac' in body:
-            if self.hs.config.worker_app:
-                raise SynapseError(403, "Not available at this endpoint")
-
             # FIXME: Should we really be determining if this is shared secret
             # auth based purely on the 'mac' key?
             result = yield self._do_shared_secret_registration(
@@ -466,9 +456,6 @@ def on_POST(self, request):
             )
             yield self.registration_handler.post_consent_actions(registered_user_id)
 
-        if self.hs.config.worker_app:
-            yield self._invalidate_caches_client(registered_user_id)
-
         defer.returnValue((200, return_dict))
 
     def on_OPTIONS(self, _):
@@ -479,10 +466,6 @@ def _do_appservice_registration(self, username, as_token, body):
         user_id = yield self.registration_handler.appservice_register(
             username, as_token
         )
-
-        if self.hs.config.worker_app:
-            yield self._invalidate_caches_client(user_id)
-
         defer.returnValue((yield self._create_registration_details(user_id, body)))
 
     @defer.inlineCallbacks
diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 8b4554f6af4..c9e11c31359 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -146,7 +146,6 @@ class RegistrationStore(RegistrationWorkerStore,
     def __init__(self, db_conn, hs):
         super(RegistrationStore, self).__init__(db_conn, hs)
 
-        self.hs = hs
         self.clock = hs.get_clock()
 
         self.register_background_index_update(
@@ -322,12 +321,10 @@ def _register(
                 (user_id_obj.localpart, create_profile_with_displayname)
             )
 
-        # Don't invalidate here, it will be done through replication to the worker.
-        if not self.hs.config.worker_app:
-            self._invalidate_cache_and_stream(
-                txn, self.get_user_by_id, (user_id,)
-            )
-            txn.call_after(self.is_guest.invalidate, (user_id,))
+        self._invalidate_cache_and_stream(
+            txn, self.get_user_by_id, (user_id,)
+        )
+        txn.call_after(self.is_guest.invalidate, (user_id,))
 
     def get_users_by_id_case_insensitive(self, user_id):
         """Gets users that match user_id case insensitively.

From 085d69b0bddfe25d61624ab3273da14ff3c6c4b7 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 20 Feb 2019 11:25:10 +0000
Subject: [PATCH 055/278] Apply the pusher http hack in the right place (#4692)

Do it in the constructor, so that it works for badge updates as well as pushes
---
 synapse/push/httppusher.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index 82ab18acae4..080a7dd9ad2 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -86,6 +86,10 @@ def __init__(self, hs, pusherdict):
                 "'url' required in data for HTTP pusher"
             )
         self.url = self.data['url']
+        self.url = self.url.replace(
+            "https://matrix.org/_matrix/push/v1/notify",
+            "http://http-priv.matrix.org/_matrix/push/v1/notify",
+        )
         self.http_client = hs.get_simple_http_client()
         self.data_minus_url = {}
         self.data_minus_url.update(self.data)
@@ -332,12 +336,7 @@ def dispatch_push(self, event, tweaks, badge):
         if not notification_dict:
             defer.returnValue([])
         try:
-            url = self.url.replace(
-                "https://matrix.org/_matrix/push/v1/notify",
-                "http://http-priv.matrix.org/_matrix/push/v1/notify",
-            )
-
-            resp = yield self.http_client.post_json_get_json(url, notification_dict)
+            resp = yield self.http_client.post_json_get_json(self.url, notification_dict)
         except Exception:
             logger.warn(
                 "Failed to push event %s to %s",

From 6868d53fe919e041772428a1fb61ce3cef639e83 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 21 Feb 2019 15:58:15 +0000
Subject: [PATCH 056/278] bail out early in on_new_receipts if no pushers

---
 synapse/push/pusherpool.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py
index 5a4e73ccd6d..99f499a60ed 100644
--- a/synapse/push/pusherpool.py
+++ b/synapse/push/pusherpool.py
@@ -155,6 +155,10 @@ def on_new_notifications(self, min_stream_id, max_stream_id):
 
     @defer.inlineCallbacks
     def on_new_receipts(self, min_stream_id, max_stream_id, affected_room_ids):
+        if not self.pushers:
+            # nothing to do here.
+            return
+
         try:
             # Need to subtract 1 from the minimum because the lower bound here
             # is not inclusive

From 4d07dc0d18ee5506ba3664a9387645a973814f54 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Sun, 24 Feb 2019 22:24:36 +0000
Subject: [PATCH 057/278] Add a delay to the federation loop for EDUs

---
 synapse/federation/transaction_queue.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index 30941f5ad69..2969c83ac57 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -380,7 +380,9 @@ def send_edu(self, destination, edu_type, content, key=None):
         else:
             self.pending_edus_by_dest.setdefault(destination, []).append(edu)
 
-        self._attempt_new_transaction(destination)
+        # this is a bit of a hack, but we delay starting the transmission loop
+        # in an effort to batch up outgoing EDUs a bit.
+        self.clock.call_later(5.0, self._attempt_new_transaction, destination)
 
     def send_device_messages(self, destination):
         if destination == self.server_name:

From eb0334b07c65189580065c985b245232a563e53d Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Sun, 24 Feb 2019 23:15:09 +0000
Subject: [PATCH 058/278] more edu batching hackery

---
 synapse/federation/transaction_queue.py | 54 +++++++++++++++++++------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index 2969c83ac57..18e4d6575b4 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -66,6 +66,9 @@
     ["type"],
 )
 
+# number of seconds to wait to batch up outgoing EDUs
+EDU_BATCH_TIME = 5.0
+
 
 class TransactionQueue(object):
     """This class makes sure we only have one transaction in flight at
@@ -119,6 +122,12 @@ def __init__(self, hs):
         # Map of destination -> (edu_type, key) -> Edu
         self.pending_edus_keyed_by_dest = edus_keyed = {}
 
+        # In order to batch outgoing EDUs, we delay sending them. This records the time
+        # when we should send the next batch, by destination.
+        self.edu_tx_time_by_dest = {}
+
+        self.edu_tx_task_by_dest = {}
+
         LaterGauge(
             "synapse_federation_transaction_queue_pending_pdus",
             "",
@@ -380,9 +389,18 @@ def send_edu(self, destination, edu_type, content, key=None):
         else:
             self.pending_edus_by_dest.setdefault(destination, []).append(edu)
 
-        # this is a bit of a hack, but we delay starting the transmission loop
-        # in an effort to batch up outgoing EDUs a bit.
-        self.clock.call_later(5.0, self._attempt_new_transaction, destination)
+        if destination in self.edu_tx_task_by_dest:
+            # we already have a job queued to send EDUs to this destination
+            return
+
+        def send_edus():
+            del self.edu_tx_task_by_dest[destination]
+            self._send_new_transaction(destination)
+
+        self.edu_tx_time_by_dest = self.clock.time() + EDU_BATCH_TIME * 1000
+        self.edu_tx_task_by_dest[destination] = self.clock.call_later(
+            EDU_BATCH_TIME, send_edus,
+        )
 
     def send_device_messages(self, destination):
         if destination == self.server_name:
@@ -407,6 +425,7 @@ def _attempt_new_transaction(self, destination):
         Returns:
             None
         """
+
         # list of (pending_pdu, deferred, order)
         if destination in self.pending_transactions:
             # XXX: pending_transactions can get stuck on by a never-ending
@@ -460,18 +479,29 @@ def _transaction_transmission_loop(self, destination):
                 if leftover_pdus:
                     self.pending_pdus_by_dest[destination] = leftover_pdus
 
-                pending_edus = self.pending_edus_by_dest.pop(destination, [])
+                # if we have PDUs to send, we may as well send EDUs too. Otherwise,
+                # we only send EDUs if their delay is up
+                if pending_pdus or (
+                    destination in self.edu_tx_time_by_dest
+                    and self.clock.time() > self.edu_tx_time_by_dest[destination]
+                ):
+                    del self.edu_tx_time_by_dest[destination]
 
-                # We can only include at most 100 EDUs per transactions
-                pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:]
-                if leftover_edus:
-                    self.pending_edus_by_dest[destination] = leftover_edus
+                    pending_edus = self.pending_edus_by_dest.pop(destination, [])
 
-                pending_presence = self.pending_presence_by_dest.pop(destination, {})
+                    # We can only include at most 100 EDUs per transactions
+                    pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:]
+                    if leftover_edus:
+                        self.edu_tx_time_by_dest[destination] = self.clock.time()
+                        self.pending_edus_by_dest[destination] = leftover_edus
 
-                pending_edus.extend(
-                    self.pending_edus_keyed_by_dest.pop(destination, {}).values()
-                )
+                    pending_edus.extend(
+                        self.pending_edus_keyed_by_dest.pop(destination, {}).values()
+                    )
+                else:
+                    pending_edus = []
+
+                pending_presence = self.pending_presence_by_dest.pop(destination, {})
 
                 pending_edus.extend(device_message_edus)
                 if pending_presence:

From 000d2309019a2001ec32214d512a3933dc3f885e Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Sun, 24 Feb 2019 23:19:37 +0000
Subject: [PATCH 059/278] fix edu batching hackery

---
 synapse/federation/transaction_queue.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index 18e4d6575b4..074dae12db3 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -389,15 +389,18 @@ def send_edu(self, destination, edu_type, content, key=None):
         else:
             self.pending_edus_by_dest.setdefault(destination, []).append(edu)
 
+        if destination not in self.edu_tx_time_by_dest:
+            txtime = self.clock.time() + EDU_BATCH_TIME * 1000
+            self.edu_tx_time_by_dest[destination] = txtime
+
         if destination in self.edu_tx_task_by_dest:
             # we already have a job queued to send EDUs to this destination
             return
 
         def send_edus():
             del self.edu_tx_task_by_dest[destination]
-            self._send_new_transaction(destination)
+            self._attempt_new_transaction(destination)
 
-        self.edu_tx_time_by_dest = self.clock.time() + EDU_BATCH_TIME * 1000
         self.edu_tx_task_by_dest[destination] = self.clock.call_later(
             EDU_BATCH_TIME, send_edus,
         )

From 8d7c0264bca6f1c70cd9ea0f6fc3a4fa5630d4ef Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Sun, 24 Feb 2019 23:27:52 +0000
Subject: [PATCH 060/278] more fix edu batching hackery

---
 synapse/federation/transaction_queue.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index 074dae12db3..c3b77419a9f 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -484,9 +484,9 @@ def _transaction_transmission_loop(self, destination):
 
                 # if we have PDUs to send, we may as well send EDUs too. Otherwise,
                 # we only send EDUs if their delay is up
-                if pending_pdus or (
-                    destination in self.edu_tx_time_by_dest
-                    and self.clock.time() > self.edu_tx_time_by_dest[destination]
+                if destination in self.edu_tx_time_by_dest and (
+                    pending_pdus or
+                    self.clock.time() > self.edu_tx_time_by_dest[destination]
                 ):
                     del self.edu_tx_time_by_dest[destination]
 

From 5d2f755d3f6669f4710eacc5a76915d185242ab7 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Mon, 25 Feb 2019 14:36:30 +0000
Subject: [PATCH 061/278] Add some debug to help with #4733

---
 changelog.d/4734.misc               | 1 +
 synapse/replication/tcp/protocol.py | 3 +++
 2 files changed, 4 insertions(+)
 create mode 100644 changelog.d/4734.misc

diff --git a/changelog.d/4734.misc b/changelog.d/4734.misc
new file mode 100644
index 00000000000..f4e3aeb44f3
--- /dev/null
+++ b/changelog.d/4734.misc
@@ -0,0 +1 @@
+Add some debug to help with #4733.
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 0b3fe6cbf58..4f4d9915a87 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -52,6 +52,7 @@
 import fcntl
 import logging
 import struct
+import traceback
 from collections import defaultdict
 
 from six import iteritems, iterkeys
@@ -323,6 +324,8 @@ def stopProducing(self):
         we or the remote has closed the connection)
         """
         logger.info("[%s] Stop producing", self.id())
+        # debug for #4733
+        logger.info("Traceback: %s", "".join(traceback.format_stack()))
         self.on_connection_closed()
 
     def connectionLost(self, reason):

From b50fe65a22d734cc7c3cb9b60f56be4bc6ba1070 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@jki.re>
Date: Mon, 25 Feb 2019 15:55:21 +0000
Subject: [PATCH 062/278] Add logging when sending error

---
 synapse/replication/tcp/protocol.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 4f4d9915a87..75c9e8355ff 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -242,6 +242,7 @@ def close(self):
     def send_error(self, error_string, *args):
         """Send an error to remote and close the connection.
         """
+        logger.error("[%s] Sending error: %s", self.id(), error_string % args)
         self.send_command(ErrorCommand(error_string % args))
         self.close()
 

From 6087c53830c7f566969621959a2ddad9a120e9f7 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Mon, 25 Feb 2019 17:00:18 +0000
Subject: [PATCH 063/278] Add more debug for membership syncing issues (#4719)

---
 changelog.d/4719.misc     |  1 +
 synapse/handlers/sync.py  | 49 ++++++++++++++++++++++++++++++++-------
 synapse/storage/stream.py | 15 ++++++++++++
 3 files changed, 56 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/4719.misc

diff --git a/changelog.d/4719.misc b/changelog.d/4719.misc
new file mode 100644
index 00000000000..8bc536ab669
--- /dev/null
+++ b/changelog.d/4719.misc
@@ -0,0 +1 @@
+Add more debug for membership syncing issues.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 24fc3850ff5..b4c4d899452 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -965,6 +965,15 @@ def generate_sync_result(self, sync_config, since_token=None, full_state=False):
 
         yield self._generate_sync_entry_for_groups(sync_result_builder)
 
+        # debug for https://github.com/matrix-org/synapse/issues/4422
+        for joined_room in sync_result_builder.joined:
+            room_id = joined_room.room_id
+            if room_id in newly_joined_rooms:
+                logger.info(
+                    "Sync result for newly joined room %s: %r",
+                    room_id, joined_room,
+                )
+
         defer.returnValue(SyncResult(
             presence=sync_result_builder.presence,
             account_data=sync_result_builder.account_data,
@@ -1522,30 +1531,39 @@ def _get_rooms_changed(self, sync_result_builder, ignored_users):
         for room_id in sync_result_builder.joined_room_ids:
             room_entry = room_to_events.get(room_id, None)
 
+            newly_joined = room_id in newly_joined_rooms
             if room_entry:
                 events, start_key = room_entry
 
                 prev_batch_token = now_token.copy_and_replace("room_key", start_key)
 
-                room_entries.append(RoomSyncResultBuilder(
+                entry = RoomSyncResultBuilder(
                     room_id=room_id,
                     rtype="joined",
                     events=events,
-                    newly_joined=room_id in newly_joined_rooms,
+                    newly_joined=newly_joined,
                     full_state=False,
-                    since_token=None if room_id in newly_joined_rooms else since_token,
+                    since_token=None if newly_joined else since_token,
                     upto_token=prev_batch_token,
-                ))
+                )
             else:
-                room_entries.append(RoomSyncResultBuilder(
+                entry = RoomSyncResultBuilder(
                     room_id=room_id,
                     rtype="joined",
                     events=[],
-                    newly_joined=room_id in newly_joined_rooms,
+                    newly_joined=newly_joined,
                     full_state=False,
                     since_token=since_token,
                     upto_token=since_token,
-                ))
+                )
+
+            if newly_joined:
+                # debugging for https://github.com/matrix-org/synapse/issues/4422
+                logger.info(
+                    "RoomSyncResultBuilder events for newly joined room %s: %r",
+                    room_id, entry.events,
+                )
+            room_entries.append(entry)
 
         defer.returnValue((room_entries, invited, newly_joined_rooms, newly_left_rooms))
 
@@ -1666,6 +1684,13 @@ def _generate_room_entry(self, sync_result_builder, ignored_users,
             newly_joined_room=newly_joined,
         )
 
+        if newly_joined:
+            # debug for https://github.com/matrix-org/synapse/issues/4422
+            logger.info(
+                "Timeline events after filtering in newly-joined room %s: %r",
+                room_id, batch,
+            )
+
         # When we join the room (or the client requests full_state), we should
         # send down any existing tags. Usually the user won't have tags in a
         # newly joined room, unless either a) they've joined before or b) the
@@ -1897,7 +1922,12 @@ def _calculate_state(
 
 
 class SyncResultBuilder(object):
-    "Used to help build up a new SyncResult for a user"
+    """Used to help build up a new SyncResult for a user
+
+    Attributes:
+        joined (list[JoinedSyncResult]):
+        archived (list[ArchivedSyncResult]):
+    """
     def __init__(self, sync_config, full_state, since_token, now_token,
                  joined_room_ids):
         """
@@ -1906,6 +1936,7 @@ def __init__(self, sync_config, full_state, since_token, now_token,
             full_state(bool): The full_state flag as specified by user
             since_token(StreamToken): The token supplied by user, or None.
             now_token(StreamToken): The token to sync up to.
+
         """
         self.sync_config = sync_config
         self.full_state = full_state
@@ -1933,7 +1964,7 @@ def __init__(self, room_id, rtype, events, newly_joined, full_state,
         Args:
             room_id(str)
             rtype(str): One of `"joined"` or `"archived"`
-            events(list): List of events to include in the room, (more events
+            events(list[FrozenEvent]): List of events to include in the room (more events
                 may be added when generating result).
             newly_joined(bool): If the user has newly joined the room
             full_state(bool): Whether the full state should be sent in result
diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py
index d6cfdba5196..b5aa849f4c3 100644
--- a/synapse/storage/stream.py
+++ b/synapse/storage/stream.py
@@ -191,6 +191,21 @@ def get_room_min_stream_ordering(self):
     @defer.inlineCallbacks
     def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0,
                                          order='DESC'):
+        """
+
+        Args:
+            room_ids:
+            from_key:
+            to_key:
+            limit:
+            order:
+
+        Returns:
+            Deferred[dict[str,tuple[list[FrozenEvent], str]]]
+                A map from room id to a tuple containing:
+                    - list of recent events in the room
+                    - stream ordering key for the start of the chunk of events returned.
+        """
         from_id = RoomStreamToken.parse_stream_token(from_key).stream
 
         room_ids = yield self._events_stream_cache.get_entities_changed(

From bf3f8b8855c0723a65bdea61495b45d0fed24992 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Thu, 28 Feb 2019 17:46:22 +0000
Subject: [PATCH 064/278] Add more debug for #4422 (#4769)

---
 changelog.d/4769.misc    |  1 +
 synapse/handlers/sync.py | 11 +++++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 changelog.d/4769.misc

diff --git a/changelog.d/4769.misc b/changelog.d/4769.misc
new file mode 100644
index 00000000000..89144b5425e
--- /dev/null
+++ b/changelog.d/4769.misc
@@ -0,0 +1 @@
+Add more debug for #4422.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index b4c4d899452..0e504f945dd 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1437,6 +1437,17 @@ def _get_rooms_changed(self, sync_result_builder, ignored_users):
                     old_mem_ev = yield self.store.get_event(
                         old_mem_ev_id, allow_none=True
                     )
+
+                # debug for #4422
+                if has_join:
+                    prev_membership = None
+                    if old_mem_ev:
+                        prev_membership = old_mem_ev.membership
+                    logger.info(
+                        "Previous membership for room %s with join: %s (event %s)",
+                        room_id, prev_membership, old_mem_ev_id,
+                    )
+
                 if not old_mem_ev or old_mem_ev.membership != Membership.JOIN:
                     newly_joined_rooms.append(room_id)
 

From ed12338f35c7d865171df5be5bc656d8e4bc8278 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Mon, 4 Mar 2019 14:00:03 +0000
Subject: [PATCH 065/278] Remove #4733 debug (#4767)

We don't need any of this stuff now; this brings protocol.py back into line
with develop for the hotfixes branch.
---
 changelog.d/4734.misc               | 1 -
 synapse/replication/tcp/protocol.py | 4 ----
 2 files changed, 5 deletions(-)
 delete mode 100644 changelog.d/4734.misc

diff --git a/changelog.d/4734.misc b/changelog.d/4734.misc
deleted file mode 100644
index f4e3aeb44f3..00000000000
--- a/changelog.d/4734.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add some debug to help with #4733.
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 530bd3756c5..429471c3458 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -52,7 +52,6 @@
 import fcntl
 import logging
 import struct
-import traceback
 from collections import defaultdict
 
 from six import iteritems, iterkeys
@@ -242,7 +241,6 @@ def close(self):
     def send_error(self, error_string, *args):
         """Send an error to remote and close the connection.
         """
-        logger.error("[%s] Sending error: %s", self.id(), error_string % args)
         self.send_command(ErrorCommand(error_string % args))
         self.close()
 
@@ -335,8 +333,6 @@ def stopProducing(self):
         we or the remote has closed the connection)
         """
         logger.info("[%s] Stop producing", self.id())
-        # debug for #4733
-        logger.info("Traceback: %s", "".join(traceback.format_stack()))
         self.on_connection_closed()
 
     def connectionLost(self, reason):

From a6e2546980fd3c80f31f391b0eefb48c497250c4 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 5 Mar 2019 14:50:37 +0000
Subject: [PATCH 066/278] Fix outbound federation

---
 synapse/federation/transaction_queue.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index cf1759f4224..549bc944a6d 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -406,6 +406,8 @@ def send_edu(self, edu, key):
         else:
             self.pending_edus_by_dest.setdefault(edu.destination, []).append(edu)
 
+        destination = edu.destination
+
         if destination not in self.edu_tx_time_by_dest:
             txtime = self.clock.time() + EDU_BATCH_TIME * 1000
             self.edu_tx_time_by_dest[destination] = txtime

From c7285607a3652b814c0274025fc8521618d27590 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 6 Mar 2019 11:04:53 +0000
Subject: [PATCH 067/278] Revert EDU-batching hacks from matrix-org-hotfixes

Firstly: we want to do this in a better way, which is the intention of
too many RRs, which means we need to make it happen again.

This reverts commits: 8d7c0264b 000d23090 eb0334b07 4d07dc0d1
---
 synapse/federation/transaction_queue.py | 55 +++++--------------------
 1 file changed, 10 insertions(+), 45 deletions(-)

diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index 549bc944a6d..698d4b4f87c 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -66,9 +66,6 @@
     ["type"],
 )
 
-# number of seconds to wait to batch up outgoing EDUs
-EDU_BATCH_TIME = 5.0
-
 
 class TransactionQueue(object):
     """This class makes sure we only have one transaction in flight at
@@ -122,12 +119,6 @@ def __init__(self, hs):
         # Map of destination -> (edu_type, key) -> Edu
         self.pending_edus_keyed_by_dest = edus_keyed = {}
 
-        # In order to batch outgoing EDUs, we delay sending them. This records the time
-        # when we should send the next batch, by destination.
-        self.edu_tx_time_by_dest = {}
-
-        self.edu_tx_task_by_dest = {}
-
         LaterGauge(
             "synapse_federation_transaction_queue_pending_pdus",
             "",
@@ -408,21 +399,7 @@ def send_edu(self, edu, key):
 
         destination = edu.destination
 
-        if destination not in self.edu_tx_time_by_dest:
-            txtime = self.clock.time() + EDU_BATCH_TIME * 1000
-            self.edu_tx_time_by_dest[destination] = txtime
-
-        if destination in self.edu_tx_task_by_dest:
-            # we already have a job queued to send EDUs to this destination
-            return
-
-        def send_edus():
-            del self.edu_tx_task_by_dest[destination]
-            self._attempt_new_transaction(destination)
-
-        self.edu_tx_task_by_dest[destination] = self.clock.call_later(
-            EDU_BATCH_TIME, send_edus,
-        )
+        self._attempt_new_transaction(destination)
 
     def send_device_messages(self, destination):
         if destination == self.server_name:
@@ -447,7 +424,6 @@ def _attempt_new_transaction(self, destination):
         Returns:
             None
         """
-
         # list of (pending_pdu, deferred, order)
         if destination in self.pending_transactions:
             # XXX: pending_transactions can get stuck on by a never-ending
@@ -501,30 +477,19 @@ def _transaction_transmission_loop(self, destination):
                 if leftover_pdus:
                     self.pending_pdus_by_dest[destination] = leftover_pdus
 
-                # if we have PDUs to send, we may as well send EDUs too. Otherwise,
-                # we only send EDUs if their delay is up
-                if destination in self.edu_tx_time_by_dest and (
-                    pending_pdus or
-                    self.clock.time() > self.edu_tx_time_by_dest[destination]
-                ):
-                    del self.edu_tx_time_by_dest[destination]
-
-                    pending_edus = self.pending_edus_by_dest.pop(destination, [])
-
-                    # We can only include at most 100 EDUs per transactions
-                    pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:]
-                    if leftover_edus:
-                        self.edu_tx_time_by_dest[destination] = self.clock.time()
-                        self.pending_edus_by_dest[destination] = leftover_edus
+                pending_edus = self.pending_edus_by_dest.pop(destination, [])
 
-                    pending_edus.extend(
-                        self.pending_edus_keyed_by_dest.pop(destination, {}).values()
-                    )
-                else:
-                    pending_edus = []
+                # We can only include at most 100 EDUs per transactions
+                pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:]
+                if leftover_edus:
+                    self.pending_edus_by_dest[destination] = leftover_edus
 
                 pending_presence = self.pending_presence_by_dest.pop(destination, {})
 
+                pending_edus.extend(
+                    self.pending_edus_keyed_by_dest.pop(destination, {}).values()
+                )
+
                 pending_edus.extend(device_message_edus)
                 if pending_presence:
                     pending_edus.append(

From 9e9572c79e5c7dcc0275d2347e69c68cfd6e61f8 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 6 Mar 2019 11:56:03 +0000
Subject: [PATCH 068/278] Run `black` on synapse/handlers/user_directory.py
 (#4812)

This got done on the develop branch in #4635, but the subsequent merge to
hotfixes (88af0317a) discarded the changes for some reason.

Fixing this here and now means (a) there are fewer differences between
matrix-org-hotfixes and develop, making future patches easier to merge, and (b)
fixes some pep8 errors on the hotfixes branch which have been annoying me for
some time.
---
 synapse/handlers/user_directory.py | 90 ++++++++++++++++--------------
 1 file changed, 47 insertions(+), 43 deletions(-)

diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index f5c3ba23a61..c2c3cd7f48a 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import logging
-import synapse.metrics
 
 from six import iteritems
 
@@ -29,6 +28,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class UserDirectoryHandler(object):
     """Handles querying of and keeping updated the user_directory.
 
@@ -130,7 +130,7 @@ def handle_local_profile_change(self, user_id, profile):
         # Support users are for diagnostics and should not appear in the user directory.
         if not is_support:
             yield self.store.update_profile_in_user_dir(
-                user_id, profile.display_name, profile.avatar_url, None,
+                user_id, profile.display_name, profile.avatar_url, None
             )
 
     @defer.inlineCallbacks
@@ -166,8 +166,9 @@ def _unsafe_process(self):
                 self.pos = deltas[-1]["stream_id"]
 
                 # Expose current event processing position to prometheus
-                synapse.metrics.event_processing_positions.labels(
-                    "user_dir").set(self.pos)
+                synapse.metrics.event_processing_positions.labels("user_dir").set(
+                    self.pos
+                )
 
                 yield self.store.update_user_directory_stream_pos(self.pos)
 
@@ -191,21 +192,25 @@ def _do_initial_spam(self):
             logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids))
             yield self._handle_initial_room(room_id)
             num_processed_rooms += 1
-            yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.)
+            yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0)
 
         logger.info("Processed all rooms.")
 
         if self.search_all_users:
             num_processed_users = 0
             user_ids = yield self.store.get_all_local_users()
-            logger.info("Doing initial update of user directory. %d users", len(user_ids))
+            logger.info(
+                "Doing initial update of user directory. %d users", len(user_ids)
+            )
             for user_id in user_ids:
                 # We add profiles for all users even if they don't match the
                 # include pattern, just in case we want to change it in future
-                logger.info("Handling user %d/%d", num_processed_users + 1, len(user_ids))
+                logger.info(
+                    "Handling user %d/%d", num_processed_users + 1, len(user_ids)
+                )
                 yield self._handle_local_user(user_id)
                 num_processed_users += 1
-                yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.)
+                yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.0)
 
             logger.info("Processed all users")
 
@@ -224,24 +229,24 @@ def _handle_initial_room(self, room_id):
         if not is_in_room:
             return
 
-        is_public = yield self.store.is_room_world_readable_or_publicly_joinable(room_id)
+        is_public = yield self.store.is_room_world_readable_or_publicly_joinable(
+            room_id
+        )
 
         users_with_profile = yield self.state.get_current_user_in_room(room_id)
         user_ids = set(users_with_profile)
         unhandled_users = user_ids - self.initially_handled_users
 
         yield self.store.add_profiles_to_user_dir(
-            room_id, {
-                user_id: users_with_profile[user_id] for user_id in unhandled_users
-            }
+            room_id,
+            {user_id: users_with_profile[user_id] for user_id in unhandled_users},
         )
 
         self.initially_handled_users |= unhandled_users
 
         if is_public:
             yield self.store.add_users_to_public_room(
-                room_id,
-                user_ids=user_ids - self.initially_handled_users_in_public
+                room_id, user_ids=user_ids - self.initially_handled_users_in_public
             )
             self.initially_handled_users_in_public |= user_ids
 
@@ -253,7 +258,7 @@ def _handle_initial_room(self, room_id):
         count = 0
         for user_id in user_ids:
             if count % self.INITIAL_ROOM_SLEEP_COUNT == 0:
-                yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.)
+                yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0)
 
             if not self.is_mine_id(user_id):
                 count += 1
@@ -268,7 +273,7 @@ def _handle_initial_room(self, room_id):
                     continue
 
                 if count % self.INITIAL_ROOM_SLEEP_COUNT == 0:
-                    yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.)
+                    yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0)
                 count += 1
 
                 user_set = (user_id, other_user_id)
@@ -290,25 +295,23 @@ def _handle_initial_room(self, room_id):
 
                 if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE:
                     yield self.store.add_users_who_share_room(
-                        room_id, not is_public, to_insert,
+                        room_id, not is_public, to_insert
                     )
                     to_insert.clear()
 
                 if len(to_update) > self.INITIAL_ROOM_BATCH_SIZE:
                     yield self.store.update_users_who_share_room(
-                        room_id, not is_public, to_update,
+                        room_id, not is_public, to_update
                     )
                     to_update.clear()
 
         if to_insert:
-            yield self.store.add_users_who_share_room(
-                room_id, not is_public, to_insert,
-            )
+            yield self.store.add_users_who_share_room(room_id, not is_public, to_insert)
             to_insert.clear()
 
         if to_update:
             yield self.store.update_users_who_share_room(
-                room_id, not is_public, to_update,
+                room_id, not is_public, to_update
             )
             to_update.clear()
 
@@ -329,11 +332,12 @@ def _handle_deltas(self, deltas):
             # may have become public or not and add/remove the users in said room
             if typ in (EventTypes.RoomHistoryVisibility, EventTypes.JoinRules):
                 yield self._handle_room_publicity_change(
-                    room_id, prev_event_id, event_id, typ,
+                    room_id, prev_event_id, event_id, typ
                 )
             elif typ == EventTypes.Member:
                 change = yield self._get_key_change(
-                    prev_event_id, event_id,
+                    prev_event_id,
+                    event_id,
                     key_name="membership",
                     public_value=Membership.JOIN,
                 )
@@ -343,7 +347,7 @@ def _handle_deltas(self, deltas):
                     if change is None:
                         # Handle any profile changes
                         yield self._handle_profile_change(
-                            state_key, room_id, prev_event_id, event_id,
+                            state_key, room_id, prev_event_id, event_id
                         )
                         continue
 
@@ -375,13 +379,15 @@ def _handle_room_publicity_change(self, room_id, prev_event_id, event_id, typ):
 
         if typ == EventTypes.RoomHistoryVisibility:
             change = yield self._get_key_change(
-                prev_event_id, event_id,
+                prev_event_id,
+                event_id,
                 key_name="history_visibility",
                 public_value="world_readable",
             )
         elif typ == EventTypes.JoinRules:
             change = yield self._get_key_change(
-                prev_event_id, event_id,
+                prev_event_id,
+                event_id,
                 key_name="join_rule",
                 public_value=JoinRules.PUBLIC,
             )
@@ -506,7 +512,7 @@ def _handle_new_user(self, room_id, user_id, profile):
             )
             if self.is_mine_id(other_user_id) and not is_appservice:
                 shared_is_private = yield self.store.get_if_users_share_a_room(
-                    other_user_id, user_id,
+                    other_user_id, user_id
                 )
                 if shared_is_private is True:
                     # We've already marked in the database they share a private room
@@ -521,13 +527,11 @@ def _handle_new_user(self, room_id, user_id, profile):
                     to_insert.add((other_user_id, user_id))
 
         if to_insert:
-            yield self.store.add_users_who_share_room(
-                room_id, not is_public, to_insert,
-            )
+            yield self.store.add_users_who_share_room(room_id, not is_public, to_insert)
 
         if to_update:
             yield self.store.update_users_who_share_room(
-                room_id, not is_public, to_update,
+                room_id, not is_public, to_update
             )
 
     @defer.inlineCallbacks
@@ -546,15 +550,15 @@ def _handle_remove_user(self, room_id, user_id):
         row = yield self.store.get_user_in_public_room(user_id)
         update_user_in_public = row and row["room_id"] == room_id
 
-        if (update_user_in_public or update_user_dir):
+        if update_user_in_public or update_user_dir:
             # XXX: Make this faster?
             rooms = yield self.store.get_rooms_for_user(user_id)
             for j_room_id in rooms:
-                if (not update_user_in_public and not update_user_dir):
+                if not update_user_in_public and not update_user_dir:
                     break
 
                 is_in_room = yield self.store.is_host_joined(
-                    j_room_id, self.server_name,
+                    j_room_id, self.server_name
                 )
 
                 if not is_in_room:
@@ -582,19 +586,19 @@ def _handle_remove_user(self, room_id, user_id):
         # Get a list of user tuples that were in the DB due to this room and
         # users (this includes tuples where the other user matches `user_id`)
         user_tuples = yield self.store.get_users_in_share_dir_with_room_id(
-            user_id, room_id,
+            user_id, room_id
         )
 
         for user_id, other_user_id in user_tuples:
             # For each user tuple get a list of rooms that they still share,
             # trying to find a private room, and update the entry in the DB
-            rooms = yield self.store.get_rooms_in_common_for_users(user_id, other_user_id)
+            rooms = yield self.store.get_rooms_in_common_for_users(
+                user_id, other_user_id
+            )
 
             # If they dont share a room anymore, remove the mapping
             if not rooms:
-                yield self.store.remove_user_who_share_room(
-                    user_id, other_user_id,
-                )
+                yield self.store.remove_user_who_share_room(user_id, other_user_id)
                 continue
 
             found_public_share = None
@@ -608,13 +612,13 @@ def _handle_remove_user(self, room_id, user_id):
                 else:
                     found_public_share = None
                     yield self.store.update_users_who_share_room(
-                        room_id, not is_public, [(user_id, other_user_id)],
+                        room_id, not is_public, [(user_id, other_user_id)]
                     )
                     break
 
             if found_public_share:
                 yield self.store.update_users_who_share_room(
-                    room_id, not is_public, [(user_id, other_user_id)],
+                    room_id, not is_public, [(user_id, other_user_id)]
                 )
 
     @defer.inlineCallbacks
@@ -642,7 +646,7 @@ def _handle_profile_change(self, user_id, room_id, prev_event_id, event_id):
 
         if prev_name != new_name or prev_avatar != new_avatar:
             yield self.store.update_profile_in_user_dir(
-                user_id, new_name, new_avatar, room_id,
+                user_id, new_name, new_avatar, room_id
             )
 
     @defer.inlineCallbacks

From e9aa40199417e8384a25abdc172afc47ceb8d07e Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 6 Mar 2019 13:21:32 +0000
Subject: [PATCH 069/278] Remove redundant changes from
 synapse/replication/tcp/streams.py (#4813)

This was some hacky code (introduced in c10c71e70d) to make the presence stream
do nothing on hotfixes. We now ensure that no replication clients subscribe to
the presence stream, so this is redundant.
---
 synapse/replication/tcp/streams.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/replication/tcp/streams.py b/synapse/replication/tcp/streams.py
index d49973634e6..728746bd129 100644
--- a/synapse/replication/tcp/streams.py
+++ b/synapse/replication/tcp/streams.py
@@ -265,8 +265,8 @@ def __init__(self, hs):
         store = hs.get_datastore()
         presence_handler = hs.get_presence_handler()
 
-        self.current_token = lambda: 0
-        self.update_function = lambda _a, _b: []
+        self.current_token = store.get_current_presence_token
+        self.update_function = presence_handler.get_all_presence_updates
 
         super(PresenceStream, self).__init__(hs)
 

From 27dbc9ac423cbd4db04c9f78e56a6d28332fb5ec Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 6 Mar 2019 17:12:45 +0000
Subject: [PATCH 070/278] Reenable presence tests and remove pointless change

---
 synapse/federation/transaction_queue.py |  4 +---
 tests/rest/client/v1/test_rooms.py      | 12 +++++-------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index 698d4b4f87c..e5e42c647d5 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -397,9 +397,7 @@ def send_edu(self, edu, key):
         else:
             self.pending_edus_by_dest.setdefault(edu.destination, []).append(edu)
 
-        destination = edu.destination
-
-        self._attempt_new_transaction(destination)
+        self._attempt_new_transaction(edu.destination)
 
     def send_device_messages(self, destination):
         if destination == self.server_name:
diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py
index 63d4b5eb00f..015c1442485 100644
--- a/tests/rest/client/v1/test_rooms.py
+++ b/tests/rest/client/v1/test_rooms.py
@@ -761,13 +761,11 @@ def test_initial_sync(self):
 
         self.assertTrue("presence" in channel.json_body)
 
-        # presence is turned off on hotfixes
-
-        # presence_by_user = {
-        #     e["content"]["user_id"]: e for e in channel.json_body["presence"]
-        # }
-        # self.assertTrue(self.user_id in presence_by_user)
-        # self.assertEquals("m.presence", presence_by_user[self.user_id]["type"])
+        presence_by_user = {
+            e["content"]["user_id"]: e for e in channel.json_body["presence"]
+        }
+        self.assertTrue(self.user_id in presence_by_user)
+        self.assertEquals("m.presence", presence_by_user[self.user_id]["type"])
 
 
 class RoomMessageListTestCase(RoomBase):

From ed8ccc37377f5ffa0d7d7365747c8897aea6a489 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 13 Mar 2019 14:42:11 +0000
Subject: [PATCH 071/278] Reinstate EDU-batching hacks

This reverts commit c7285607a3652b814c0274025fc8521618d27590.
---
 synapse/federation/transaction_queue.py | 57 ++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 10 deletions(-)

diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index e5e42c647d5..549bc944a6d 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -66,6 +66,9 @@
     ["type"],
 )
 
+# number of seconds to wait to batch up outgoing EDUs
+EDU_BATCH_TIME = 5.0
+
 
 class TransactionQueue(object):
     """This class makes sure we only have one transaction in flight at
@@ -119,6 +122,12 @@ def __init__(self, hs):
         # Map of destination -> (edu_type, key) -> Edu
         self.pending_edus_keyed_by_dest = edus_keyed = {}
 
+        # In order to batch outgoing EDUs, we delay sending them. This records the time
+        # when we should send the next batch, by destination.
+        self.edu_tx_time_by_dest = {}
+
+        self.edu_tx_task_by_dest = {}
+
         LaterGauge(
             "synapse_federation_transaction_queue_pending_pdus",
             "",
@@ -397,7 +406,23 @@ def send_edu(self, edu, key):
         else:
             self.pending_edus_by_dest.setdefault(edu.destination, []).append(edu)
 
-        self._attempt_new_transaction(edu.destination)
+        destination = edu.destination
+
+        if destination not in self.edu_tx_time_by_dest:
+            txtime = self.clock.time() + EDU_BATCH_TIME * 1000
+            self.edu_tx_time_by_dest[destination] = txtime
+
+        if destination in self.edu_tx_task_by_dest:
+            # we already have a job queued to send EDUs to this destination
+            return
+
+        def send_edus():
+            del self.edu_tx_task_by_dest[destination]
+            self._attempt_new_transaction(destination)
+
+        self.edu_tx_task_by_dest[destination] = self.clock.call_later(
+            EDU_BATCH_TIME, send_edus,
+        )
 
     def send_device_messages(self, destination):
         if destination == self.server_name:
@@ -422,6 +447,7 @@ def _attempt_new_transaction(self, destination):
         Returns:
             None
         """
+
         # list of (pending_pdu, deferred, order)
         if destination in self.pending_transactions:
             # XXX: pending_transactions can get stuck on by a never-ending
@@ -475,18 +501,29 @@ def _transaction_transmission_loop(self, destination):
                 if leftover_pdus:
                     self.pending_pdus_by_dest[destination] = leftover_pdus
 
-                pending_edus = self.pending_edus_by_dest.pop(destination, [])
+                # if we have PDUs to send, we may as well send EDUs too. Otherwise,
+                # we only send EDUs if their delay is up
+                if destination in self.edu_tx_time_by_dest and (
+                    pending_pdus or
+                    self.clock.time() > self.edu_tx_time_by_dest[destination]
+                ):
+                    del self.edu_tx_time_by_dest[destination]
 
-                # We can only include at most 100 EDUs per transactions
-                pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:]
-                if leftover_edus:
-                    self.pending_edus_by_dest[destination] = leftover_edus
+                    pending_edus = self.pending_edus_by_dest.pop(destination, [])
 
-                pending_presence = self.pending_presence_by_dest.pop(destination, {})
+                    # We can only include at most 100 EDUs per transactions
+                    pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:]
+                    if leftover_edus:
+                        self.edu_tx_time_by_dest[destination] = self.clock.time()
+                        self.pending_edus_by_dest[destination] = leftover_edus
 
-                pending_edus.extend(
-                    self.pending_edus_keyed_by_dest.pop(destination, {}).values()
-                )
+                    pending_edus.extend(
+                        self.pending_edus_keyed_by_dest.pop(destination, {}).values()
+                    )
+                else:
+                    pending_edus = []
+
+                pending_presence = self.pending_presence_by_dest.pop(destination, {})
 
                 pending_edus.extend(device_message_edus)
                 if pending_presence:

From 73c6630718de6950b723c18e25eb7c316f08b608 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 19 Mar 2019 12:17:28 +0000
Subject: [PATCH 072/278] Revert "Reinstate EDU-batching hacks"

This reverts commit ed8ccc37377f5ffa0d7d7365747c8897aea6a489.
---
 synapse/federation/transaction_queue.py | 57 +++++--------------------
 1 file changed, 10 insertions(+), 47 deletions(-)

diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index 549bc944a6d..e5e42c647d5 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -66,9 +66,6 @@
     ["type"],
 )
 
-# number of seconds to wait to batch up outgoing EDUs
-EDU_BATCH_TIME = 5.0
-
 
 class TransactionQueue(object):
     """This class makes sure we only have one transaction in flight at
@@ -122,12 +119,6 @@ def __init__(self, hs):
         # Map of destination -> (edu_type, key) -> Edu
         self.pending_edus_keyed_by_dest = edus_keyed = {}
 
-        # In order to batch outgoing EDUs, we delay sending them. This records the time
-        # when we should send the next batch, by destination.
-        self.edu_tx_time_by_dest = {}
-
-        self.edu_tx_task_by_dest = {}
-
         LaterGauge(
             "synapse_federation_transaction_queue_pending_pdus",
             "",
@@ -406,23 +397,7 @@ def send_edu(self, edu, key):
         else:
             self.pending_edus_by_dest.setdefault(edu.destination, []).append(edu)
 
-        destination = edu.destination
-
-        if destination not in self.edu_tx_time_by_dest:
-            txtime = self.clock.time() + EDU_BATCH_TIME * 1000
-            self.edu_tx_time_by_dest[destination] = txtime
-
-        if destination in self.edu_tx_task_by_dest:
-            # we already have a job queued to send EDUs to this destination
-            return
-
-        def send_edus():
-            del self.edu_tx_task_by_dest[destination]
-            self._attempt_new_transaction(destination)
-
-        self.edu_tx_task_by_dest[destination] = self.clock.call_later(
-            EDU_BATCH_TIME, send_edus,
-        )
+        self._attempt_new_transaction(edu.destination)
 
     def send_device_messages(self, destination):
         if destination == self.server_name:
@@ -447,7 +422,6 @@ def _attempt_new_transaction(self, destination):
         Returns:
             None
         """
-
         # list of (pending_pdu, deferred, order)
         if destination in self.pending_transactions:
             # XXX: pending_transactions can get stuck on by a never-ending
@@ -501,30 +475,19 @@ def _transaction_transmission_loop(self, destination):
                 if leftover_pdus:
                     self.pending_pdus_by_dest[destination] = leftover_pdus
 
-                # if we have PDUs to send, we may as well send EDUs too. Otherwise,
-                # we only send EDUs if their delay is up
-                if destination in self.edu_tx_time_by_dest and (
-                    pending_pdus or
-                    self.clock.time() > self.edu_tx_time_by_dest[destination]
-                ):
-                    del self.edu_tx_time_by_dest[destination]
-
-                    pending_edus = self.pending_edus_by_dest.pop(destination, [])
-
-                    # We can only include at most 100 EDUs per transactions
-                    pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:]
-                    if leftover_edus:
-                        self.edu_tx_time_by_dest[destination] = self.clock.time()
-                        self.pending_edus_by_dest[destination] = leftover_edus
+                pending_edus = self.pending_edus_by_dest.pop(destination, [])
 
-                    pending_edus.extend(
-                        self.pending_edus_keyed_by_dest.pop(destination, {}).values()
-                    )
-                else:
-                    pending_edus = []
+                # We can only include at most 100 EDUs per transactions
+                pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:]
+                if leftover_edus:
+                    self.pending_edus_by_dest[destination] = leftover_edus
 
                 pending_presence = self.pending_presence_by_dest.pop(destination, {})
 
+                pending_edus.extend(
+                    self.pending_edus_keyed_by_dest.pop(destination, {}).values()
+                )
+
                 pending_edus.extend(device_message_edus)
                 if pending_presence:
                     pending_edus.append(

From 233b61ac61f39b6c107c14dd399a2ac67d1cfc38 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 2 Apr 2019 13:51:37 +0100
Subject: [PATCH 073/278] Remove spurious changelog files from hotfixes

The relevant patches are now in develop thanks to
https://github.com/matrix-org/synapse/pull/4816.
---
 changelog.d/4719.misc | 1 -
 changelog.d/4769.misc | 1 -
 2 files changed, 2 deletions(-)
 delete mode 100644 changelog.d/4719.misc
 delete mode 100644 changelog.d/4769.misc

diff --git a/changelog.d/4719.misc b/changelog.d/4719.misc
deleted file mode 100644
index 8bc536ab669..00000000000
--- a/changelog.d/4719.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add more debug for membership syncing issues.
diff --git a/changelog.d/4769.misc b/changelog.d/4769.misc
deleted file mode 100644
index 89144b5425e..00000000000
--- a/changelog.d/4769.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add more debug for #4422.

From ec94d6a590e37fb5be4b802579d6e002766e2751 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 15 Apr 2019 19:21:32 +0100
Subject: [PATCH 074/278] VersionRestServlet doesn't take a param

---
 synapse/app/client_reader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py
index 1e9e686107e..864f1eac482 100644
--- a/synapse/app/client_reader.py
+++ b/synapse/app/client_reader.py
@@ -114,7 +114,7 @@ def _listen_http(self, listener_config):
                     KeyChangesServlet(self).register(resource)
                     VoipRestServlet(self).register(resource)
                     PushRuleRestServlet(self).register(resource)
-                    VersionsRestServlet(self).register(resource)
+                    VersionsRestServlet().register(resource)
 
                     resources.update({
                         "/_matrix/client": resource,

From aadba440da3ac124a4ecfb13da0fba23a51efbf8 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 15 Apr 2019 19:23:21 +0100
Subject: [PATCH 075/278] Point pusher to new box

---
 synapse/push/httppusher.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index 495ffbc3bfa..6e40863e788 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -109,7 +109,7 @@ def __init__(self, hs, pusherdict):
         self.url = self.data['url']
         self.url = self.url.replace(
             "https://matrix.org/_matrix/push/v1/notify",
-            "http://http-priv.matrix.org/_matrix/push/v1/notify",
+            "http://10.101.0.14/_matrix/push/v1/notify",
         )
         self.http_client = hs.get_simple_http_client()
         self.data_minus_url = {}

From fc5be50d561dcf0f069c5b4920faa0dfd3a962e2 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 16 Apr 2019 15:16:57 +0100
Subject: [PATCH 076/278] skip send without trailing slash

---
 synapse/federation/transport/client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index e424c40fdf5..de888cc12f8 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -173,7 +173,7 @@ def send_transaction(self, transaction, json_data_callback=None):
         # generated by the json_data_callback.
         json_data = transaction.get_dict()
 
-        path = _create_v1_path("/send/%s", transaction.transaction_id)
+        path = _create_v1_path("/send/%s/", transaction.transaction_id)
 
         response = yield self.client.put_json(
             transaction.destination,
@@ -182,7 +182,7 @@ def send_transaction(self, transaction, json_data_callback=None):
             json_data_callback=json_data_callback,
             long_retries=True,
             backoff_on_404=True,  # If we get a 404 the other side has gone
-            try_trailing_slash_on_400=True,
+            # try_trailing_slash_on_400=True,
         )
 
         defer.returnValue(response)

From 8699f380f035550ca05d50eadb1fd03fab5ec86b Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 4 Jun 2019 12:14:41 +0100
Subject: [PATCH 077/278] hotfix RetryLimiter

---
 synapse/util/retryutils.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index f6dfa77d8fd..1a77456498a 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -97,7 +97,12 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False, **kwargs)
 
     defer.returnValue(
         RetryDestinationLimiter(
-            destination, clock, store, retry_interval, backoff_on_failure, **kwargs
+            destination,
+            clock,
+            store,
+            retry_interval,
+            backoff_on_failure=backoff_on_failure,
+            **kwargs
         )
     )
 

From d90b0946ed775ca228895dd9f7e63bd16bed6391 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Tue, 13 Aug 2019 18:05:06 +0100
Subject: [PATCH 078/278] Temporary fix to ensure kde can contact matrix.org if
 stuff breaks

---
 synapse/http/federation/well_known_resolver.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py
index d2866ff67da..d4bbf057e25 100644
--- a/synapse/http/federation/well_known_resolver.py
+++ b/synapse/http/federation/well_known_resolver.py
@@ -79,6 +79,10 @@ def get_well_known(self, server_name):
         Returns:
             Deferred[WellKnownLookupResult]: The result of the lookup
         """
+
+        if server_name == b"kde.org":
+            return WellKnownLookupResult(delegated_server=b"kde.modular.im:443")
+
         try:
             result = self._well_known_cache[server_name]
         except KeyError:

From 1ceeccb76987cfcc833498a07f866b461f451467 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Fri, 6 Sep 2019 13:00:34 +0100
Subject: [PATCH 079/278] Move get_threepid_validation_session into
 RegistrationWorkerStore

---
 synapse/storage/registration.py | 151 ++++++++++++++++----------------
 1 file changed, 76 insertions(+), 75 deletions(-)

diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 2d3c7e2dc9c..fae3d92cc6d 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -614,6 +614,82 @@ def get_user_deactivated_status(self, user_id):
         # Convert the integer into a boolean.
         return res == 1
 
+    def validate_threepid_session(self, session_id, client_secret, token, current_ts):
+        """Attempt to validate a threepid session using a token
+
+        Args:
+            session_id (str): The id of a validation session
+            client_secret (str): A unique string provided by the client to
+                help identify this validation attempt
+            token (str): A validation token
+            current_ts (int): The current unix time in milliseconds. Used for
+                checking token expiry status
+
+        Returns:
+            deferred str|None: A str representing a link to redirect the user
+            to if there is one.
+        """
+
+        # Insert everything into a transaction in order to run atomically
+        def validate_threepid_session_txn(txn):
+            row = self._simple_select_one_txn(
+                txn,
+                table="threepid_validation_session",
+                keyvalues={"session_id": session_id},
+                retcols=["client_secret", "validated_at"],
+                allow_none=True,
+            )
+
+            if not row:
+                raise ThreepidValidationError(400, "Unknown session_id")
+            retrieved_client_secret = row["client_secret"]
+            validated_at = row["validated_at"]
+
+            if retrieved_client_secret != client_secret:
+                raise ThreepidValidationError(
+                    400, "This client_secret does not match the provided session_id"
+                )
+
+            row = self._simple_select_one_txn(
+                txn,
+                table="threepid_validation_token",
+                keyvalues={"session_id": session_id, "token": token},
+                retcols=["expires", "next_link"],
+                allow_none=True,
+            )
+
+            if not row:
+                raise ThreepidValidationError(
+                    400, "Validation token not found or has expired"
+                )
+            expires = row["expires"]
+            next_link = row["next_link"]
+
+            # If the session is already validated, no need to revalidate
+            if validated_at:
+                return next_link
+
+            if expires <= current_ts:
+                raise ThreepidValidationError(
+                    400, "This token has expired. Please request a new one"
+                )
+
+            # Looks good. Validate the session
+            self._simple_update_txn(
+                txn,
+                table="threepid_validation_session",
+                keyvalues={"session_id": session_id},
+                updatevalues={"validated_at": self.clock.time_msec()},
+            )
+
+            return next_link
+
+        # Return next_link if it exists
+        return self.runInteraction(
+            "validate_threepid_session_txn", validate_threepid_session_txn
+        )
+
+
 
 class RegistrationStore(
     RegistrationWorkerStore, background_updates.BackgroundUpdateStore
@@ -1136,81 +1212,6 @@ def get_threepid_validation_session_txn(txn):
             "get_threepid_validation_session", get_threepid_validation_session_txn
         )
 
-    def validate_threepid_session(self, session_id, client_secret, token, current_ts):
-        """Attempt to validate a threepid session using a token
-
-        Args:
-            session_id (str): The id of a validation session
-            client_secret (str): A unique string provided by the client to
-                help identify this validation attempt
-            token (str): A validation token
-            current_ts (int): The current unix time in milliseconds. Used for
-                checking token expiry status
-
-        Returns:
-            deferred str|None: A str representing a link to redirect the user
-            to if there is one.
-        """
-
-        # Insert everything into a transaction in order to run atomically
-        def validate_threepid_session_txn(txn):
-            row = self._simple_select_one_txn(
-                txn,
-                table="threepid_validation_session",
-                keyvalues={"session_id": session_id},
-                retcols=["client_secret", "validated_at"],
-                allow_none=True,
-            )
-
-            if not row:
-                raise ThreepidValidationError(400, "Unknown session_id")
-            retrieved_client_secret = row["client_secret"]
-            validated_at = row["validated_at"]
-
-            if retrieved_client_secret != client_secret:
-                raise ThreepidValidationError(
-                    400, "This client_secret does not match the provided session_id"
-                )
-
-            row = self._simple_select_one_txn(
-                txn,
-                table="threepid_validation_token",
-                keyvalues={"session_id": session_id, "token": token},
-                retcols=["expires", "next_link"],
-                allow_none=True,
-            )
-
-            if not row:
-                raise ThreepidValidationError(
-                    400, "Validation token not found or has expired"
-                )
-            expires = row["expires"]
-            next_link = row["next_link"]
-
-            # If the session is already validated, no need to revalidate
-            if validated_at:
-                return next_link
-
-            if expires <= current_ts:
-                raise ThreepidValidationError(
-                    400, "This token has expired. Please request a new one"
-                )
-
-            # Looks good. Validate the session
-            self._simple_update_txn(
-                txn,
-                table="threepid_validation_session",
-                keyvalues={"session_id": session_id},
-                updatevalues={"validated_at": self.clock.time_msec()},
-            )
-
-            return next_link
-
-        # Return next_link if it exists
-        return self.runInteraction(
-            "validate_threepid_session_txn", validate_threepid_session_txn
-        )
-
     def upsert_threepid_validation_session(
         self,
         medium,

From e0eef473158d8b60bbea6fb130cc89796fc3e606 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Wed, 11 Sep 2019 11:59:45 +0100
Subject: [PATCH 080/278] Fix existing v2 identity server calls (MSC2140)
 (#6013)

Two things I missed while implementing [MSC2140](https://github.com/matrix-org/matrix-doc/pull/2140/files#diff-c03a26de5ac40fb532de19cb7fc2aaf7R80).

1. Access tokens should be provided to the identity server as `access_token`, not `id_access_token`, even though the homeserver may accept the tokens as `id_access_token`.
2. Access tokens must be sent to the identity server in a query parameter, the JSON body is not allowed.

We now send the access token as part of an `Authorization: ...` header, which fixes both things.

The breaking code was added in https://github.com/matrix-org/synapse/pull/5892

Sytest PR: https://github.com/matrix-org/sytest/pull/697
---
 changelog.d/6013.misc        |  1 +
 synapse/handlers/identity.py | 28 ++++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/6013.misc

diff --git a/changelog.d/6013.misc b/changelog.d/6013.misc
new file mode 100644
index 00000000000..939fe8c6559
--- /dev/null
+++ b/changelog.d/6013.misc
@@ -0,0 +1 @@
+Compatibility with v2 Identity Service APIs other than /lookup.
\ No newline at end of file
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index f6d1d1717e6..73fe98f2966 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -74,6 +74,25 @@ def _extract_items_from_creds_dict(self, creds):
         id_access_token = creds.get("id_access_token")
         return client_secret, id_server, id_access_token
 
+    def create_id_access_token_header(self, id_access_token):
+        """Create an Authorization header for passing to SimpleHttpClient as the header value
+        of an HTTP request.
+
+        Args:
+            id_access_token (str): An identity server access token.
+
+        Returns:
+            list[str]: The ascii-encoded bearer token encased in a list.
+        """
+        # Prefix with Bearer
+        bearer_token = "Bearer %s" % id_access_token
+
+        # Encode headers to standard ascii
+        bearer_token.encode("ascii")
+
+        # Return as a list as that's how SimpleHttpClient takes header values
+        return [bearer_token]
+
     @defer.inlineCallbacks
     def threepid_from_creds(self, id_server, creds):
         """
@@ -149,15 +168,20 @@ def bind_threepid(self, creds, mxid, use_v2=True):
             use_v2 = False
 
         # Decide which API endpoint URLs to use
+        headers = {}
         bind_data = {"sid": creds["sid"], "client_secret": client_secret, "mxid": mxid}
         if use_v2:
             bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,)
-            bind_data["id_access_token"] = id_access_token
+            headers["Authorization"] = self.create_id_access_token_header(
+                id_access_token
+            )
         else:
             bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,)
 
         try:
-            data = yield self.http_client.post_json_get_json(bind_url, bind_data)
+            data = yield self.http_client.post_json_get_json(
+                bind_url, bind_data, headers=headers
+            )
             logger.debug("bound threepid %r to %s", creds, mxid)
 
             # Remember where we bound the threepid

From ee91c69ef792c3db7b4438d17e38f343f9c10b72 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 13 Sep 2019 14:44:48 +0100
Subject: [PATCH 081/278] Fix m.federate bug

---
 synapse/handlers/stats.py | 2 +-
 synapse/storage/stats.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
index 921735edb31..4a962d97c9b 100644
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -260,7 +260,7 @@ def _handle_deltas(self, deltas):
                         room_stats_delta["local_users_in_room"] += delta
 
             elif typ == EventTypes.Create:
-                room_state["is_federatable"] = event_content.get("m.federate", True)
+                room_state["is_federatable"] = event_content.get("m.federate", True) is True
                 if sender and self.is_mine_id(sender):
                     user_to_stats_deltas.setdefault(sender, Counter())[
                         "rooms_created"
diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py
index 6560173c08e..8ba7051086b 100644
--- a/synapse/storage/stats.py
+++ b/synapse/storage/stats.py
@@ -823,7 +823,7 @@ def _fetch_current_state_stats(txn):
             elif event.type == EventTypes.CanonicalAlias:
                 room_state["canonical_alias"] = event.content.get("alias")
             elif event.type == EventTypes.Create:
-                room_state["is_federatable"] = event.content.get("m.federate", True)
+                room_state["is_federatable"] = event.content.get("m.federate", True) is True
 
         yield self.update_room_state(room_id, room_state)
 

From e01026d84d74c78fa6a0411c2597cf33ec8fbde5 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 18 Sep 2019 13:53:37 +0100
Subject: [PATCH 082/278] Revert "Fix existing v2 identity server calls
 (MSC2140) (#6013)"

This has now been merged into develop (3505ffcda) so we don't need this
cherry-picked commit.

This reverts commit e0eef473158d8b60bbea6fb130cc89796fc3e606.
---
 changelog.d/6013.misc        |  1 -
 synapse/handlers/identity.py | 28 ++--------------------------
 2 files changed, 2 insertions(+), 27 deletions(-)
 delete mode 100644 changelog.d/6013.misc

diff --git a/changelog.d/6013.misc b/changelog.d/6013.misc
deleted file mode 100644
index 939fe8c6559..00000000000
--- a/changelog.d/6013.misc
+++ /dev/null
@@ -1 +0,0 @@
-Compatibility with v2 Identity Service APIs other than /lookup.
\ No newline at end of file
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 73fe98f2966..f6d1d1717e6 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -74,25 +74,6 @@ def _extract_items_from_creds_dict(self, creds):
         id_access_token = creds.get("id_access_token")
         return client_secret, id_server, id_access_token
 
-    def create_id_access_token_header(self, id_access_token):
-        """Create an Authorization header for passing to SimpleHttpClient as the header value
-        of an HTTP request.
-
-        Args:
-            id_access_token (str): An identity server access token.
-
-        Returns:
-            list[str]: The ascii-encoded bearer token encased in a list.
-        """
-        # Prefix with Bearer
-        bearer_token = "Bearer %s" % id_access_token
-
-        # Encode headers to standard ascii
-        bearer_token.encode("ascii")
-
-        # Return as a list as that's how SimpleHttpClient takes header values
-        return [bearer_token]
-
     @defer.inlineCallbacks
     def threepid_from_creds(self, id_server, creds):
         """
@@ -168,20 +149,15 @@ def bind_threepid(self, creds, mxid, use_v2=True):
             use_v2 = False
 
         # Decide which API endpoint URLs to use
-        headers = {}
         bind_data = {"sid": creds["sid"], "client_secret": client_secret, "mxid": mxid}
         if use_v2:
             bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % (id_server,)
-            headers["Authorization"] = self.create_id_access_token_header(
-                id_access_token
-            )
+            bind_data["id_access_token"] = id_access_token
         else:
             bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % (id_server,)
 
         try:
-            data = yield self.http_client.post_json_get_json(
-                bind_url, bind_data, headers=headers
-            )
+            data = yield self.http_client.post_json_get_json(bind_url, bind_data)
             logger.debug("bound threepid %r to %s", creds, mxid)
 
             # Remember where we bound the threepid

From 05241b30316584cbb341cc8b995b8d6aefb827d8 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 18 Sep 2019 13:54:57 +0100
Subject: [PATCH 083/278] Revert "Fix m.federate bug"

This has now been merged into develop (142c9325c) so we no longer need this
cherry-picked commit.

This reverts commit ee91c69ef792c3db7b4438d17e38f343f9c10b72.
---
 synapse/handlers/stats.py | 2 +-
 synapse/storage/stats.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
index 4a962d97c9b..921735edb31 100644
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -260,7 +260,7 @@ def _handle_deltas(self, deltas):
                         room_stats_delta["local_users_in_room"] += delta
 
             elif typ == EventTypes.Create:
-                room_state["is_federatable"] = event_content.get("m.federate", True) is True
+                room_state["is_federatable"] = event_content.get("m.federate", True)
                 if sender and self.is_mine_id(sender):
                     user_to_stats_deltas.setdefault(sender, Counter())[
                         "rooms_created"
diff --git a/synapse/storage/stats.py b/synapse/storage/stats.py
index 8ba7051086b..6560173c08e 100644
--- a/synapse/storage/stats.py
+++ b/synapse/storage/stats.py
@@ -823,7 +823,7 @@ def _fetch_current_state_stats(txn):
             elif event.type == EventTypes.CanonicalAlias:
                 room_state["canonical_alias"] = event.content.get("alias")
             elif event.type == EventTypes.Create:
-                room_state["is_federatable"] = event.content.get("m.federate", True) is True
+                room_state["is_federatable"] = event.content.get("m.federate", True)
 
         yield self.update_room_state(room_id, room_state)
 

From 721086a291b7d187725ab3314dc1a4f11bd00f46 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 27 Sep 2019 16:00:00 +0100
Subject: [PATCH 084/278] Awful hackery to try to get the fed sender to keep up

Basically, if the federation sender starts getting behind, insert some sleeps
into the transaction transmission code to give the fed sender a chance to catch
up.

Might have to experiment a bit with the numbers.
---
 changelog.d/6126.feature                       |  1 +
 synapse/federation/sender/__init__.py          | 18 ++++++++++++++++++
 .../federation/sender/per_destination_queue.py |  5 +++++
 .../federation/sender/transaction_manager.py   |  4 ++++
 4 files changed, 28 insertions(+)
 create mode 100644 changelog.d/6126.feature

diff --git a/changelog.d/6126.feature b/changelog.d/6126.feature
new file mode 100644
index 00000000000..1207ba6206a
--- /dev/null
+++ b/changelog.d/6126.feature
@@ -0,0 +1 @@
+Group events into larger federation transactions at times of high traffic.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index d46f4aaeb1a..497485fac22 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -152,9 +152,24 @@ def notify_new_events(self, current_id):
 
     @defer.inlineCallbacks
     def _process_event_queue_loop(self):
+        loop_start_time = self.clock.time_msec()
         try:
             self._is_processing = True
             while True:
+                # if we've been going around this loop for a long time without
+                # catching up, deprioritise transaction transmission. This should mean
+                # that events get batched into fewer transactions, which is more
+                # efficient, and hence give us a chance to catch up
+                if (
+                    self.clock.time_msec() - loop_start_time > 60 * 1000
+                    and not self._transaction_manager.deprioritise_transmission
+                ):
+                    logger.warning(
+                        "Event processing loop is getting behind: deprioritising "
+                        "transaction transmission"
+                    )
+                    self._transaction_manager.deprioritise_transmission = True
+
                 last_token = yield self.store.get_federation_out_pos("events")
                 next_token, events = yield self.store.get_all_new_events_stream(
                     last_token, self._last_poked_id, limit=100
@@ -251,6 +266,9 @@ def handle_room_events(events):
 
         finally:
             self._is_processing = False
+            if self._transaction_manager.deprioritise_transmission:
+                logger.info("Event queue caught up: re-prioritising transmission")
+                self._transaction_manager.deprioritise_transmission = False
 
     def _send_pdu(self, pdu, destinations):
         # We loop through all destinations to see whether we already have
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index fad980b8930..b890aaf8409 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -189,6 +189,11 @@ def _transaction_transmission_loop(self):
 
             pending_pdus = []
             while True:
+                if self._transaction_manager.deprioritise_transmission:
+                    # if the event-processing loop has got behind, sleep to give it
+                    # a chance to catch up
+                    yield self._clock.sleep(2)
+
                 # We have to keep 2 free slots for presence and rr_edus
                 limit = MAX_EDUS_PER_TRANSACTION - 2
 
diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py
index 5b6c79c51af..69679dbf65c 100644
--- a/synapse/federation/sender/transaction_manager.py
+++ b/synapse/federation/sender/transaction_manager.py
@@ -49,6 +49,10 @@ def __init__(self, hs):
         # HACK to get unique tx id
         self._next_txn_id = int(self.clock.time_msec())
 
+        # the federation sender sometimes sets this to delay transaction transmission,
+        # if the sender gets behind.
+        self.deprioritise_transmission = False
+
     @measure_func("_send_new_transaction")
     @defer.inlineCallbacks
     def send_new_transaction(self, destination, pending_pdus, pending_edus):

From cb217d5d60c9e006357b0d37247d5acb3daae84a Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 3 Oct 2019 17:05:24 +0100
Subject: [PATCH 085/278] Revert "Awful hackery to try to get the fed sender to
 keep up"

This reverts commit 721086a291b7d187725ab3314dc1a4f11bd00f46.

This didn't help.
---
 changelog.d/6126.feature                       |  1 -
 synapse/federation/sender/__init__.py          | 18 ------------------
 .../federation/sender/per_destination_queue.py |  5 -----
 .../federation/sender/transaction_manager.py   |  4 ----
 4 files changed, 28 deletions(-)
 delete mode 100644 changelog.d/6126.feature

diff --git a/changelog.d/6126.feature b/changelog.d/6126.feature
deleted file mode 100644
index 1207ba6206a..00000000000
--- a/changelog.d/6126.feature
+++ /dev/null
@@ -1 +0,0 @@
-Group events into larger federation transactions at times of high traffic.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 497485fac22..d46f4aaeb1a 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -152,24 +152,9 @@ def notify_new_events(self, current_id):
 
     @defer.inlineCallbacks
     def _process_event_queue_loop(self):
-        loop_start_time = self.clock.time_msec()
         try:
             self._is_processing = True
             while True:
-                # if we've been going around this loop for a long time without
-                # catching up, deprioritise transaction transmission. This should mean
-                # that events get batched into fewer transactions, which is more
-                # efficient, and hence give us a chance to catch up
-                if (
-                    self.clock.time_msec() - loop_start_time > 60 * 1000
-                    and not self._transaction_manager.deprioritise_transmission
-                ):
-                    logger.warning(
-                        "Event processing loop is getting behind: deprioritising "
-                        "transaction transmission"
-                    )
-                    self._transaction_manager.deprioritise_transmission = True
-
                 last_token = yield self.store.get_federation_out_pos("events")
                 next_token, events = yield self.store.get_all_new_events_stream(
                     last_token, self._last_poked_id, limit=100
@@ -266,9 +251,6 @@ def handle_room_events(events):
 
         finally:
             self._is_processing = False
-            if self._transaction_manager.deprioritise_transmission:
-                logger.info("Event queue caught up: re-prioritising transmission")
-                self._transaction_manager.deprioritise_transmission = False
 
     def _send_pdu(self, pdu, destinations):
         # We loop through all destinations to see whether we already have
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index b890aaf8409..fad980b8930 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -189,11 +189,6 @@ def _transaction_transmission_loop(self):
 
             pending_pdus = []
             while True:
-                if self._transaction_manager.deprioritise_transmission:
-                    # if the event-processing loop has got behind, sleep to give it
-                    # a chance to catch up
-                    yield self._clock.sleep(2)
-
                 # We have to keep 2 free slots for presence and rr_edus
                 limit = MAX_EDUS_PER_TRANSACTION - 2
 
diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py
index 69679dbf65c..5b6c79c51af 100644
--- a/synapse/federation/sender/transaction_manager.py
+++ b/synapse/federation/sender/transaction_manager.py
@@ -49,10 +49,6 @@ def __init__(self, hs):
         # HACK to get unique tx id
         self._next_txn_id = int(self.clock.time_msec())
 
-        # the federation sender sometimes sets this to delay transaction transmission,
-        # if the sender gets behind.
-        self.deprioritise_transmission = False
-
     @measure_func("_send_new_transaction")
     @defer.inlineCallbacks
     def send_new_transaction(self, destination, pending_pdus, pending_edus):

From b852a8247d1132fae125c3fb813023b6ec3f6cb3 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 27 Sep 2019 16:00:00 +0100
Subject: [PATCH 086/278] Awful hackery to try to get the fed sender to keep up

Basically, if the federation sender starts getting behind, insert some sleeps
into the transaction transmission code to give the fed sender a chance to catch
up.

Might have to experiment a bit with the numbers.
---
 changelog.d/6126.feature                       |  1 +
 synapse/federation/sender/__init__.py          | 18 ++++++++++++++++++
 .../federation/sender/per_destination_queue.py |  5 +++++
 .../federation/sender/transaction_manager.py   |  4 ++++
 4 files changed, 28 insertions(+)
 create mode 100644 changelog.d/6126.feature

diff --git a/changelog.d/6126.feature b/changelog.d/6126.feature
new file mode 100644
index 00000000000..1207ba6206a
--- /dev/null
+++ b/changelog.d/6126.feature
@@ -0,0 +1 @@
+Group events into larger federation transactions at times of high traffic.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 2b2ee8612ad..f23bbf0e1f7 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -152,9 +152,24 @@ def notify_new_events(self, current_id):
 
     @defer.inlineCallbacks
     def _process_event_queue_loop(self):
+        loop_start_time = self.clock.time_msec()
         try:
             self._is_processing = True
             while True:
+                # if we've been going around this loop for a long time without
+                # catching up, deprioritise transaction transmission. This should mean
+                # that events get batched into fewer transactions, which is more
+                # efficient, and hence give us a chance to catch up
+                if (
+                    self.clock.time_msec() - loop_start_time > 60 * 1000
+                    and not self._transaction_manager.deprioritise_transmission
+                ):
+                    logger.warning(
+                        "Event processing loop is getting behind: deprioritising "
+                        "transaction transmission"
+                    )
+                    self._transaction_manager.deprioritise_transmission = True
+
                 last_token = yield self.store.get_federation_out_pos("events")
                 next_token, events = yield self.store.get_all_new_events_stream(
                     last_token, self._last_poked_id, limit=100
@@ -252,6 +267,9 @@ def handle_room_events(events):
 
         finally:
             self._is_processing = False
+            if self._transaction_manager.deprioritise_transmission:
+                logger.info("Event queue caught up: re-prioritising transmission")
+                self._transaction_manager.deprioritise_transmission = False
 
     def _send_pdu(self, pdu, destinations):
         # We loop through all destinations to see whether we already have
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index fad980b8930..b890aaf8409 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -189,6 +189,11 @@ def _transaction_transmission_loop(self):
 
             pending_pdus = []
             while True:
+                if self._transaction_manager.deprioritise_transmission:
+                    # if the event-processing loop has got behind, sleep to give it
+                    # a chance to catch up
+                    yield self._clock.sleep(2)
+
                 # We have to keep 2 free slots for presence and rr_edus
                 limit = MAX_EDUS_PER_TRANSACTION - 2
 
diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py
index 5b6c79c51af..69679dbf65c 100644
--- a/synapse/federation/sender/transaction_manager.py
+++ b/synapse/federation/sender/transaction_manager.py
@@ -49,6 +49,10 @@ def __init__(self, hs):
         # HACK to get unique tx id
         self._next_txn_id = int(self.clock.time_msec())
 
+        # the federation sender sometimes sets this to delay transaction transmission,
+        # if the sender gets behind.
+        self.deprioritise_transmission = False
+
     @measure_func("_send_new_transaction")
     @defer.inlineCallbacks
     def send_new_transaction(self, destination, pending_pdus, pending_edus):

From 15b2a50817348c8e25a8e5f420b3dfd20451c6d6 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 11 Oct 2019 09:15:56 +0100
Subject: [PATCH 087/278] Add some randomness to the high-cpu backoff hack

---
 synapse/federation/sender/__init__.py              |  4 ++--
 synapse/federation/sender/per_destination_queue.py | 12 ++++++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index f23bbf0e1f7..788b26446dd 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -165,8 +165,8 @@ def _process_event_queue_loop(self):
                     and not self._transaction_manager.deprioritise_transmission
                 ):
                     logger.warning(
-                        "Event processing loop is getting behind: deprioritising "
-                        "transaction transmission"
+                        "Event queue is getting behind: deprioritising transaction "
+                        "transmission"
                     )
                     self._transaction_manager.deprioritise_transmission = True
 
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index b890aaf8409..69a6f47b7bd 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 import datetime
 import logging
+import random
 
 from prometheus_client import Counter
 
@@ -36,6 +37,8 @@
 # This is defined in the Matrix spec and enforced by the receiver.
 MAX_EDUS_PER_TRANSACTION = 100
 
+DEPRIORITISE_SLEEP_TIME = 10
+
 logger = logging.getLogger(__name__)
 
 
@@ -191,8 +194,13 @@ def _transaction_transmission_loop(self):
             while True:
                 if self._transaction_manager.deprioritise_transmission:
                     # if the event-processing loop has got behind, sleep to give it
-                    # a chance to catch up
-                    yield self._clock.sleep(2)
+                    # a chance to catch up. Add some randomness so that the transmitters
+                    # don't all wake up in sync.
+                    sleeptime = random.uniform(
+                        DEPRIORITISE_SLEEP_TIME, DEPRIORITISE_SLEEP_TIME * 2
+                    )
+                    logger.info("TX [%s]: sleeping for %f seconds", sleeptime)
+                    yield self._clock.sleep(sleeptime)
 
                 # We have to keep 2 free slots for presence and rr_edus
                 limit = MAX_EDUS_PER_TRANSACTION - 2

From 28889d8da5b1ca8c7106e8ace246220e96af9f59 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 11 Oct 2019 09:57:18 +0100
Subject: [PATCH 088/278] fix logging

---
 synapse/federation/sender/per_destination_queue.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 69a6f47b7bd..818c102799f 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -199,7 +199,9 @@ def _transaction_transmission_loop(self):
                     sleeptime = random.uniform(
                         DEPRIORITISE_SLEEP_TIME, DEPRIORITISE_SLEEP_TIME * 2
                     )
-                    logger.info("TX [%s]: sleeping for %f seconds", sleeptime)
+                    logger.info(
+                        "TX [%s]: sleeping for %f seconds", self._destination, sleeptime
+                    )
                     yield self._clock.sleep(sleeptime)
 
                 # We have to keep 2 free slots for presence and rr_edus

From 5407e69732b42e47d084f3ac179eedbecd1f78be Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Tue, 26 Nov 2019 12:04:19 +0000
Subject: [PATCH 089/278] Change /push/v1/notify IP to 10.103.0.7

---
 synapse/push/httppusher.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index 338267c82dc..026575278c4 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -105,7 +105,7 @@ def __init__(self, hs, pusherdict):
         self.url = self.data["url"]
         self.url = self.url.replace(
             "https://matrix.org/_matrix/push/v1/notify",
-            "http://10.101.0.14/_matrix/push/v1/notify",
+            "http://10.103.0.7/_matrix/push/v1/notify",
         )
         self.http_client = hs.get_proxied_http_client()
         self.data_minus_url = {}

From 508e0f9310abedb4d10602e7bf7e57512900d80c Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Tue, 26 Nov 2019 12:15:46 +0000
Subject: [PATCH 090/278] 1.6.0

---
 CHANGES.md          | 6 ++++++
 debian/changelog    | 6 ++++++
 synapse/__init__.py | 2 +-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index d26bc7a86f2..f25627442e1 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,9 @@
+Synapse 1.6.0 (2019-11-26)
+==========================
+
+No significant changes.
+
+
 Synapse 1.6.0rc2 (2019-11-25)
 =============================
 
diff --git a/debian/changelog b/debian/changelog
index c4415f460a8..82dae017f10 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.6.0) stable; urgency=medium
+
+  * New synapse release 1.6.0.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 26 Nov 2019 12:15:40 +0000
+
 matrix-synapse-py3 (1.5.1) stable; urgency=medium
 
   * New synapse release 1.5.1.
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 051c83774e2..53eedc0048f 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -36,7 +36,7 @@
 except ImportError:
     pass
 
-__version__ = "1.6.0rc2"
+__version__ = "1.6.0"
 
 if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
     # We import here so that we don't have to install a bunch of deps when

From aebe20c4524da46241d1d8a0e87a9fa6e7b5056a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 26 Nov 2019 13:10:09 +0000
Subject: [PATCH 091/278] Fix phone home stats (#6418)

Fix phone home stats
---
 changelog.d/6418.bugfix   | 1 +
 synapse/app/homeserver.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/6418.bugfix

diff --git a/changelog.d/6418.bugfix b/changelog.d/6418.bugfix
new file mode 100644
index 00000000000..a1f488d3a22
--- /dev/null
+++ b/changelog.d/6418.bugfix
@@ -0,0 +1 @@
+Fix phone home stats reporting.
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 73e2c29d067..883b3fb70b8 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -585,7 +585,7 @@ def profiled(*args, **kargs):
     def performance_stats_init():
         _stats_process.clear()
         _stats_process.append(
-            (int(hs.get_clock().time(), resource.getrusage(resource.RUSAGE_SELF)))
+            (int(hs.get_clock().time()), resource.getrusage(resource.RUSAGE_SELF))
         )
 
     def start_phone_stats_home():

From 4d02bfd6e1e6388a0c67c618570fc35c814d8017 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 21 Jan 2020 23:02:58 +0000
Subject: [PATCH 092/278] a bit of debugging for media storage providers

---
 synapse/rest/media/v1/media_storage.py    | 1 +
 synapse/rest/media/v1/storage_provider.py | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index 3b87717a5aa..683a79c9664 100644
--- a/synapse/rest/media/v1/media_storage.py
+++ b/synapse/rest/media/v1/media_storage.py
@@ -148,6 +148,7 @@ def fetch_media(self, file_info):
         for provider in self.storage_providers:
             res = yield provider.fetch(path, file_info)
             if res:
+                logger.debug("Streaming %s from %s", path, provider)
                 return res
 
         return None
diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py
index 37687ea7f4e..858680be266 100644
--- a/synapse/rest/media/v1/storage_provider.py
+++ b/synapse/rest/media/v1/storage_provider.py
@@ -77,6 +77,9 @@ def __init__(self, backend, store_local, store_synchronous, store_remote):
         self.store_synchronous = store_synchronous
         self.store_remote = store_remote
 
+    def __str__(self):
+        return "StorageProviderWrapper[%s]" % (self.backend,)
+
     def store_file(self, path, file_info):
         if not file_info.server_name and not self.store_local:
             return defer.succeed(None)
@@ -114,6 +117,9 @@ def __init__(self, hs, config):
         self.cache_directory = hs.config.media_store_path
         self.base_directory = config
 
+    def __str__(self):
+        return "FileStorageProviderBackend[%s]" % (self.base_directory,)
+
     def store_file(self, path, file_info):
         """See StorageProvider.store_file"""
 

From 77a166577a9833bb0df0d2e0bd6631a18a15a8e5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 7 Feb 2020 11:14:19 +0000
Subject: [PATCH 093/278] Allow moving group read APIs to workers (#6866)

---
 changelog.d/6866.feature                      |   1 +
 docs/workers.md                               |   8 +
 synapse/app/client_reader.py                  |   3 +
 synapse/app/federation_reader.py              |   2 +
 synapse/groups/groups_server.py               | 377 ++++----
 synapse/handlers/groups_local.py              | 270 +++---
 synapse/replication/slave/storage/groups.py   |  14 +-
 synapse/server.py                             |  14 +-
 .../storage/data_stores/main/group_server.py  | 880 +++++++++---------
 9 files changed, 802 insertions(+), 767 deletions(-)
 create mode 100644 changelog.d/6866.feature

diff --git a/changelog.d/6866.feature b/changelog.d/6866.feature
new file mode 100644
index 00000000000..256feab6ff4
--- /dev/null
+++ b/changelog.d/6866.feature
@@ -0,0 +1 @@
+Add ability to run some group APIs on workers.
diff --git a/docs/workers.md b/docs/workers.md
index 09a9d8a7b85..82442d6a0a0 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -177,8 +177,13 @@ endpoints matching the following regular expressions:
     ^/_matrix/federation/v1/event_auth/
     ^/_matrix/federation/v1/exchange_third_party_invite/
     ^/_matrix/federation/v1/send/
+    ^/_matrix/federation/v1/get_groups_publicised$
     ^/_matrix/key/v2/query
 
+Additionally, the following REST endpoints can be handled for GET requests:
+
+    ^/_matrix/federation/v1/groups/
+
 The above endpoints should all be routed to the federation_reader worker by the
 reverse-proxy configuration.
 
@@ -254,10 +259,13 @@ following regular expressions:
     ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$
     ^/_matrix/client/versions$
     ^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$
+    ^/_matrix/client/(api/v1|r0|unstable)/joined_groups$
+    ^/_matrix/client/(api/v1|r0|unstable)/get_groups_publicised$
 
 Additionally, the following REST endpoints can be handled for GET requests:
 
     ^/_matrix/client/(api/v1|r0|unstable)/pushrules/.*$
+    ^/_matrix/client/(api/v1|r0|unstable)/groups/.*$
 
 Additionally, the following REST endpoints can be handled, but all requests must
 be routed to the same instance:
diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py
index ca96da6a4a8..7fa91a3b114 100644
--- a/synapse/app/client_reader.py
+++ b/synapse/app/client_reader.py
@@ -57,6 +57,7 @@
     RoomStateRestServlet,
 )
 from synapse.rest.client.v1.voip import VoipRestServlet
+from synapse.rest.client.v2_alpha import groups
 from synapse.rest.client.v2_alpha.account import ThreepidRestServlet
 from synapse.rest.client.v2_alpha.keys import KeyChangesServlet, KeyQueryServlet
 from synapse.rest.client.v2_alpha.register import RegisterRestServlet
@@ -124,6 +125,8 @@ def _listen_http(self, listener_config):
                     PushRuleRestServlet(self).register(resource)
                     VersionsRestServlet(self).register(resource)
 
+                    groups.register_servlets(self, resource)
+
                     resources.update({"/_matrix/client": resource})
 
         root_resource = create_resource_tree(resources, NoResource())
diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py
index 1f1cea14160..5e17ef1396c 100644
--- a/synapse/app/federation_reader.py
+++ b/synapse/app/federation_reader.py
@@ -35,6 +35,7 @@
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
 from synapse.replication.slave.storage.directory import DirectoryStore
 from synapse.replication.slave.storage.events import SlavedEventStore
+from synapse.replication.slave.storage.groups import SlavedGroupServerStore
 from synapse.replication.slave.storage.keys import SlavedKeyStore
 from synapse.replication.slave.storage.profile import SlavedProfileStore
 from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore
@@ -66,6 +67,7 @@ class FederationReaderSlavedStore(
     SlavedEventStore,
     SlavedKeyStore,
     SlavedRegistrationStore,
+    SlavedGroupServerStore,
     RoomStore,
     DirectoryStore,
     SlavedTransactionStore,
diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py
index 0ec9be3cb51..c106abae213 100644
--- a/synapse/groups/groups_server.py
+++ b/synapse/groups/groups_server.py
@@ -36,7 +36,7 @@
 # TODO: Flairs
 
 
-class GroupsServerHandler(object):
+class GroupsServerWorkerHandler(object):
     def __init__(self, hs):
         self.hs = hs
         self.store = hs.get_datastore()
@@ -51,9 +51,6 @@ def __init__(self, hs):
         self.transport_client = hs.get_federation_transport_client()
         self.profile_handler = hs.get_profile_handler()
 
-        # Ensure attestations get renewed
-        hs.get_groups_attestation_renewer()
-
     @defer.inlineCallbacks
     def check_group_is_ours(
         self, group_id, requester_user_id, and_exists=False, and_is_admin=None
@@ -167,6 +164,197 @@ def get_group_summary(self, group_id, requester_user_id):
             "user": membership_info,
         }
 
+    @defer.inlineCallbacks
+    def get_group_categories(self, group_id, requester_user_id):
+        """Get all categories in a group (as seen by user)
+        """
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        categories = yield self.store.get_group_categories(group_id=group_id)
+        return {"categories": categories}
+
+    @defer.inlineCallbacks
+    def get_group_category(self, group_id, requester_user_id, category_id):
+        """Get a specific category in a group (as seen by user)
+        """
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        res = yield self.store.get_group_category(
+            group_id=group_id, category_id=category_id
+        )
+
+        logger.info("group %s", res)
+
+        return res
+
+    @defer.inlineCallbacks
+    def get_group_roles(self, group_id, requester_user_id):
+        """Get all roles in a group (as seen by user)
+        """
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        roles = yield self.store.get_group_roles(group_id=group_id)
+        return {"roles": roles}
+
+    @defer.inlineCallbacks
+    def get_group_role(self, group_id, requester_user_id, role_id):
+        """Get a specific role in a group (as seen by user)
+        """
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        res = yield self.store.get_group_role(group_id=group_id, role_id=role_id)
+        return res
+
+    @defer.inlineCallbacks
+    def get_group_profile(self, group_id, requester_user_id):
+        """Get the group profile as seen by requester_user_id
+        """
+
+        yield self.check_group_is_ours(group_id, requester_user_id)
+
+        group = yield self.store.get_group(group_id)
+
+        if group:
+            cols = [
+                "name",
+                "short_description",
+                "long_description",
+                "avatar_url",
+                "is_public",
+            ]
+            group_description = {key: group[key] for key in cols}
+            group_description["is_openly_joinable"] = group["join_policy"] == "open"
+
+            return group_description
+        else:
+            raise SynapseError(404, "Unknown group")
+
+    @defer.inlineCallbacks
+    def get_users_in_group(self, group_id, requester_user_id):
+        """Get the users in group as seen by requester_user_id.
+
+        The ordering is arbitrary at the moment
+        """
+
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        is_user_in_group = yield self.store.is_user_in_group(
+            requester_user_id, group_id
+        )
+
+        user_results = yield self.store.get_users_in_group(
+            group_id, include_private=is_user_in_group
+        )
+
+        chunk = []
+        for user_result in user_results:
+            g_user_id = user_result["user_id"]
+            is_public = user_result["is_public"]
+            is_privileged = user_result["is_admin"]
+
+            entry = {"user_id": g_user_id}
+
+            profile = yield self.profile_handler.get_profile_from_cache(g_user_id)
+            entry.update(profile)
+
+            entry["is_public"] = bool(is_public)
+            entry["is_privileged"] = bool(is_privileged)
+
+            if not self.is_mine_id(g_user_id):
+                attestation = yield self.store.get_remote_attestation(
+                    group_id, g_user_id
+                )
+                if not attestation:
+                    continue
+
+                entry["attestation"] = attestation
+            else:
+                entry["attestation"] = self.attestations.create_attestation(
+                    group_id, g_user_id
+                )
+
+            chunk.append(entry)
+
+        # TODO: If admin add lists of users whose attestations have timed out
+
+        return {"chunk": chunk, "total_user_count_estimate": len(user_results)}
+
+    @defer.inlineCallbacks
+    def get_invited_users_in_group(self, group_id, requester_user_id):
+        """Get the users that have been invited to a group as seen by requester_user_id.
+
+        The ordering is arbitrary at the moment
+        """
+
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        is_user_in_group = yield self.store.is_user_in_group(
+            requester_user_id, group_id
+        )
+
+        if not is_user_in_group:
+            raise SynapseError(403, "User not in group")
+
+        invited_users = yield self.store.get_invited_users_in_group(group_id)
+
+        user_profiles = []
+
+        for user_id in invited_users:
+            user_profile = {"user_id": user_id}
+            try:
+                profile = yield self.profile_handler.get_profile_from_cache(user_id)
+                user_profile.update(profile)
+            except Exception as e:
+                logger.warning("Error getting profile for %s: %s", user_id, e)
+            user_profiles.append(user_profile)
+
+        return {"chunk": user_profiles, "total_user_count_estimate": len(invited_users)}
+
+    @defer.inlineCallbacks
+    def get_rooms_in_group(self, group_id, requester_user_id):
+        """Get the rooms in group as seen by requester_user_id
+
+        This returns rooms in order of decreasing number of joined users
+        """
+
+        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
+
+        is_user_in_group = yield self.store.is_user_in_group(
+            requester_user_id, group_id
+        )
+
+        room_results = yield self.store.get_rooms_in_group(
+            group_id, include_private=is_user_in_group
+        )
+
+        chunk = []
+        for room_result in room_results:
+            room_id = room_result["room_id"]
+
+            joined_users = yield self.store.get_users_in_room(room_id)
+            entry = yield self.room_list_handler.generate_room_entry(
+                room_id, len(joined_users), with_alias=False, allow_private=True
+            )
+
+            if not entry:
+                continue
+
+            entry["is_public"] = bool(room_result["is_public"])
+
+            chunk.append(entry)
+
+        chunk.sort(key=lambda e: -e["num_joined_members"])
+
+        return {"chunk": chunk, "total_room_count_estimate": len(room_results)}
+
+
+class GroupsServerHandler(GroupsServerWorkerHandler):
+    def __init__(self, hs):
+        super(GroupsServerHandler, self).__init__(hs)
+
+        # Ensure attestations get renewed
+        hs.get_groups_attestation_renewer()
+
     @defer.inlineCallbacks
     def update_group_summary_room(
         self, group_id, requester_user_id, room_id, category_id, content
@@ -229,27 +417,6 @@ def set_group_join_policy(self, group_id, requester_user_id, content):
 
         return {}
 
-    @defer.inlineCallbacks
-    def get_group_categories(self, group_id, requester_user_id):
-        """Get all categories in a group (as seen by user)
-        """
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        categories = yield self.store.get_group_categories(group_id=group_id)
-        return {"categories": categories}
-
-    @defer.inlineCallbacks
-    def get_group_category(self, group_id, requester_user_id, category_id):
-        """Get a specific category in a group (as seen by user)
-        """
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        res = yield self.store.get_group_category(
-            group_id=group_id, category_id=category_id
-        )
-
-        return res
-
     @defer.inlineCallbacks
     def update_group_category(self, group_id, requester_user_id, category_id, content):
         """Add/Update a group category
@@ -284,24 +451,6 @@ def delete_group_category(self, group_id, requester_user_id, category_id):
 
         return {}
 
-    @defer.inlineCallbacks
-    def get_group_roles(self, group_id, requester_user_id):
-        """Get all roles in a group (as seen by user)
-        """
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        roles = yield self.store.get_group_roles(group_id=group_id)
-        return {"roles": roles}
-
-    @defer.inlineCallbacks
-    def get_group_role(self, group_id, requester_user_id, role_id):
-        """Get a specific role in a group (as seen by user)
-        """
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        res = yield self.store.get_group_role(group_id=group_id, role_id=role_id)
-        return res
-
     @defer.inlineCallbacks
     def update_group_role(self, group_id, requester_user_id, role_id, content):
         """Add/update a role in a group
@@ -370,30 +519,6 @@ def delete_group_summary_user(self, group_id, requester_user_id, user_id, role_i
 
         return {}
 
-    @defer.inlineCallbacks
-    def get_group_profile(self, group_id, requester_user_id):
-        """Get the group profile as seen by requester_user_id
-        """
-
-        yield self.check_group_is_ours(group_id, requester_user_id)
-
-        group = yield self.store.get_group(group_id)
-
-        if group:
-            cols = [
-                "name",
-                "short_description",
-                "long_description",
-                "avatar_url",
-                "is_public",
-            ]
-            group_description = {key: group[key] for key in cols}
-            group_description["is_openly_joinable"] = group["join_policy"] == "open"
-
-            return group_description
-        else:
-            raise SynapseError(404, "Unknown group")
-
     @defer.inlineCallbacks
     def update_group_profile(self, group_id, requester_user_id, content):
         """Update the group profile
@@ -412,124 +537,6 @@ def update_group_profile(self, group_id, requester_user_id, content):
 
         yield self.store.update_group_profile(group_id, profile)
 
-    @defer.inlineCallbacks
-    def get_users_in_group(self, group_id, requester_user_id):
-        """Get the users in group as seen by requester_user_id.
-
-        The ordering is arbitrary at the moment
-        """
-
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        is_user_in_group = yield self.store.is_user_in_group(
-            requester_user_id, group_id
-        )
-
-        user_results = yield self.store.get_users_in_group(
-            group_id, include_private=is_user_in_group
-        )
-
-        chunk = []
-        for user_result in user_results:
-            g_user_id = user_result["user_id"]
-            is_public = user_result["is_public"]
-            is_privileged = user_result["is_admin"]
-
-            entry = {"user_id": g_user_id}
-
-            profile = yield self.profile_handler.get_profile_from_cache(g_user_id)
-            entry.update(profile)
-
-            entry["is_public"] = bool(is_public)
-            entry["is_privileged"] = bool(is_privileged)
-
-            if not self.is_mine_id(g_user_id):
-                attestation = yield self.store.get_remote_attestation(
-                    group_id, g_user_id
-                )
-                if not attestation:
-                    continue
-
-                entry["attestation"] = attestation
-            else:
-                entry["attestation"] = self.attestations.create_attestation(
-                    group_id, g_user_id
-                )
-
-            chunk.append(entry)
-
-        # TODO: If admin add lists of users whose attestations have timed out
-
-        return {"chunk": chunk, "total_user_count_estimate": len(user_results)}
-
-    @defer.inlineCallbacks
-    def get_invited_users_in_group(self, group_id, requester_user_id):
-        """Get the users that have been invited to a group as seen by requester_user_id.
-
-        The ordering is arbitrary at the moment
-        """
-
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        is_user_in_group = yield self.store.is_user_in_group(
-            requester_user_id, group_id
-        )
-
-        if not is_user_in_group:
-            raise SynapseError(403, "User not in group")
-
-        invited_users = yield self.store.get_invited_users_in_group(group_id)
-
-        user_profiles = []
-
-        for user_id in invited_users:
-            user_profile = {"user_id": user_id}
-            try:
-                profile = yield self.profile_handler.get_profile_from_cache(user_id)
-                user_profile.update(profile)
-            except Exception as e:
-                logger.warning("Error getting profile for %s: %s", user_id, e)
-            user_profiles.append(user_profile)
-
-        return {"chunk": user_profiles, "total_user_count_estimate": len(invited_users)}
-
-    @defer.inlineCallbacks
-    def get_rooms_in_group(self, group_id, requester_user_id):
-        """Get the rooms in group as seen by requester_user_id
-
-        This returns rooms in order of decreasing number of joined users
-        """
-
-        yield self.check_group_is_ours(group_id, requester_user_id, and_exists=True)
-
-        is_user_in_group = yield self.store.is_user_in_group(
-            requester_user_id, group_id
-        )
-
-        room_results = yield self.store.get_rooms_in_group(
-            group_id, include_private=is_user_in_group
-        )
-
-        chunk = []
-        for room_result in room_results:
-            room_id = room_result["room_id"]
-
-            joined_users = yield self.store.get_users_in_room(room_id)
-            entry = yield self.room_list_handler.generate_room_entry(
-                room_id, len(joined_users), with_alias=False, allow_private=True
-            )
-
-            if not entry:
-                continue
-
-            entry["is_public"] = bool(room_result["is_public"])
-
-            chunk.append(entry)
-
-        chunk.sort(key=lambda e: -e["num_joined_members"])
-
-        return {"chunk": chunk, "total_room_count_estimate": len(room_results)}
-
     @defer.inlineCallbacks
     def add_room_to_group(self, group_id, requester_user_id, room_id, content):
         """Add room to group
diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py
index 319565510f9..ad22415782c 100644
--- a/synapse/handlers/groups_local.py
+++ b/synapse/handlers/groups_local.py
@@ -63,7 +63,7 @@ def request_failed_errback(failure):
     return f
 
 
-class GroupsLocalHandler(object):
+class GroupsLocalWorkerHandler(object):
     def __init__(self, hs):
         self.hs = hs
         self.store = hs.get_datastore()
@@ -81,40 +81,17 @@ def __init__(self, hs):
 
         self.profile_handler = hs.get_profile_handler()
 
-        # Ensure attestations get renewed
-        hs.get_groups_attestation_renewer()
-
     # The following functions merely route the query to the local groups server
     # or federation depending on if the group is local or remote
 
     get_group_profile = _create_rerouter("get_group_profile")
-    update_group_profile = _create_rerouter("update_group_profile")
     get_rooms_in_group = _create_rerouter("get_rooms_in_group")
-
     get_invited_users_in_group = _create_rerouter("get_invited_users_in_group")
-
-    add_room_to_group = _create_rerouter("add_room_to_group")
-    update_room_in_group = _create_rerouter("update_room_in_group")
-    remove_room_from_group = _create_rerouter("remove_room_from_group")
-
-    update_group_summary_room = _create_rerouter("update_group_summary_room")
-    delete_group_summary_room = _create_rerouter("delete_group_summary_room")
-
-    update_group_category = _create_rerouter("update_group_category")
-    delete_group_category = _create_rerouter("delete_group_category")
     get_group_category = _create_rerouter("get_group_category")
     get_group_categories = _create_rerouter("get_group_categories")
-
-    update_group_summary_user = _create_rerouter("update_group_summary_user")
-    delete_group_summary_user = _create_rerouter("delete_group_summary_user")
-
-    update_group_role = _create_rerouter("update_group_role")
-    delete_group_role = _create_rerouter("delete_group_role")
     get_group_role = _create_rerouter("get_group_role")
     get_group_roles = _create_rerouter("get_group_roles")
 
-    set_group_join_policy = _create_rerouter("set_group_join_policy")
-
     @defer.inlineCallbacks
     def get_group_summary(self, group_id, requester_user_id):
         """Get the group summary for a group.
@@ -169,6 +146,144 @@ def get_group_summary(self, group_id, requester_user_id):
 
         return res
 
+    @defer.inlineCallbacks
+    def get_users_in_group(self, group_id, requester_user_id):
+        """Get users in a group
+        """
+        if self.is_mine_id(group_id):
+            res = yield self.groups_server_handler.get_users_in_group(
+                group_id, requester_user_id
+            )
+            return res
+
+        group_server_name = get_domain_from_id(group_id)
+
+        try:
+            res = yield self.transport_client.get_users_in_group(
+                get_domain_from_id(group_id), group_id, requester_user_id
+            )
+        except HttpResponseException as e:
+            raise e.to_synapse_error()
+        except RequestSendFailed:
+            raise SynapseError(502, "Failed to contact group server")
+
+        chunk = res["chunk"]
+        valid_entries = []
+        for entry in chunk:
+            g_user_id = entry["user_id"]
+            attestation = entry.pop("attestation", {})
+            try:
+                if get_domain_from_id(g_user_id) != group_server_name:
+                    yield self.attestations.verify_attestation(
+                        attestation,
+                        group_id=group_id,
+                        user_id=g_user_id,
+                        server_name=get_domain_from_id(g_user_id),
+                    )
+                valid_entries.append(entry)
+            except Exception as e:
+                logger.info("Failed to verify user is in group: %s", e)
+
+        res["chunk"] = valid_entries
+
+        return res
+
+    @defer.inlineCallbacks
+    def get_joined_groups(self, user_id):
+        group_ids = yield self.store.get_joined_groups(user_id)
+        return {"groups": group_ids}
+
+    @defer.inlineCallbacks
+    def get_publicised_groups_for_user(self, user_id):
+        if self.hs.is_mine_id(user_id):
+            result = yield self.store.get_publicised_groups_for_user(user_id)
+
+            # Check AS associated groups for this user - this depends on the
+            # RegExps in the AS registration file (under `users`)
+            for app_service in self.store.get_app_services():
+                result.extend(app_service.get_groups_for_user(user_id))
+
+            return {"groups": result}
+        else:
+            try:
+                bulk_result = yield self.transport_client.bulk_get_publicised_groups(
+                    get_domain_from_id(user_id), [user_id]
+                )
+            except HttpResponseException as e:
+                raise e.to_synapse_error()
+            except RequestSendFailed:
+                raise SynapseError(502, "Failed to contact group server")
+
+            result = bulk_result.get("users", {}).get(user_id)
+            # TODO: Verify attestations
+            return {"groups": result}
+
+    @defer.inlineCallbacks
+    def bulk_get_publicised_groups(self, user_ids, proxy=True):
+        destinations = {}
+        local_users = set()
+
+        for user_id in user_ids:
+            if self.hs.is_mine_id(user_id):
+                local_users.add(user_id)
+            else:
+                destinations.setdefault(get_domain_from_id(user_id), set()).add(user_id)
+
+        if not proxy and destinations:
+            raise SynapseError(400, "Some user_ids are not local")
+
+        results = {}
+        failed_results = []
+        for destination, dest_user_ids in iteritems(destinations):
+            try:
+                r = yield self.transport_client.bulk_get_publicised_groups(
+                    destination, list(dest_user_ids)
+                )
+                results.update(r["users"])
+            except Exception:
+                failed_results.extend(dest_user_ids)
+
+        for uid in local_users:
+            results[uid] = yield self.store.get_publicised_groups_for_user(uid)
+
+            # Check AS associated groups for this user - this depends on the
+            # RegExps in the AS registration file (under `users`)
+            for app_service in self.store.get_app_services():
+                results[uid].extend(app_service.get_groups_for_user(uid))
+
+        return {"users": results}
+
+
+class GroupsLocalHandler(GroupsLocalWorkerHandler):
+    def __init__(self, hs):
+        super(GroupsLocalHandler, self).__init__(hs)
+
+        # Ensure attestations get renewed
+        hs.get_groups_attestation_renewer()
+
+    # The following functions merely route the query to the local groups server
+    # or federation depending on if the group is local or remote
+
+    update_group_profile = _create_rerouter("update_group_profile")
+
+    add_room_to_group = _create_rerouter("add_room_to_group")
+    update_room_in_group = _create_rerouter("update_room_in_group")
+    remove_room_from_group = _create_rerouter("remove_room_from_group")
+
+    update_group_summary_room = _create_rerouter("update_group_summary_room")
+    delete_group_summary_room = _create_rerouter("delete_group_summary_room")
+
+    update_group_category = _create_rerouter("update_group_category")
+    delete_group_category = _create_rerouter("delete_group_category")
+
+    update_group_summary_user = _create_rerouter("update_group_summary_user")
+    delete_group_summary_user = _create_rerouter("delete_group_summary_user")
+
+    update_group_role = _create_rerouter("update_group_role")
+    delete_group_role = _create_rerouter("delete_group_role")
+
+    set_group_join_policy = _create_rerouter("set_group_join_policy")
+
     @defer.inlineCallbacks
     def create_group(self, group_id, user_id, content):
         """Create a group
@@ -219,48 +334,6 @@ def create_group(self, group_id, user_id, content):
 
         return res
 
-    @defer.inlineCallbacks
-    def get_users_in_group(self, group_id, requester_user_id):
-        """Get users in a group
-        """
-        if self.is_mine_id(group_id):
-            res = yield self.groups_server_handler.get_users_in_group(
-                group_id, requester_user_id
-            )
-            return res
-
-        group_server_name = get_domain_from_id(group_id)
-
-        try:
-            res = yield self.transport_client.get_users_in_group(
-                get_domain_from_id(group_id), group_id, requester_user_id
-            )
-        except HttpResponseException as e:
-            raise e.to_synapse_error()
-        except RequestSendFailed:
-            raise SynapseError(502, "Failed to contact group server")
-
-        chunk = res["chunk"]
-        valid_entries = []
-        for entry in chunk:
-            g_user_id = entry["user_id"]
-            attestation = entry.pop("attestation", {})
-            try:
-                if get_domain_from_id(g_user_id) != group_server_name:
-                    yield self.attestations.verify_attestation(
-                        attestation,
-                        group_id=group_id,
-                        user_id=g_user_id,
-                        server_name=get_domain_from_id(g_user_id),
-                    )
-                valid_entries.append(entry)
-            except Exception as e:
-                logger.info("Failed to verify user is in group: %s", e)
-
-        res["chunk"] = valid_entries
-
-        return res
-
     @defer.inlineCallbacks
     def join_group(self, group_id, user_id, content):
         """Request to join a group
@@ -452,68 +525,3 @@ def user_removed_from_group(self, group_id, user_id, content):
             group_id, user_id, membership="leave"
         )
         self.notifier.on_new_event("groups_key", token, users=[user_id])
-
-    @defer.inlineCallbacks
-    def get_joined_groups(self, user_id):
-        group_ids = yield self.store.get_joined_groups(user_id)
-        return {"groups": group_ids}
-
-    @defer.inlineCallbacks
-    def get_publicised_groups_for_user(self, user_id):
-        if self.hs.is_mine_id(user_id):
-            result = yield self.store.get_publicised_groups_for_user(user_id)
-
-            # Check AS associated groups for this user - this depends on the
-            # RegExps in the AS registration file (under `users`)
-            for app_service in self.store.get_app_services():
-                result.extend(app_service.get_groups_for_user(user_id))
-
-            return {"groups": result}
-        else:
-            try:
-                bulk_result = yield self.transport_client.bulk_get_publicised_groups(
-                    get_domain_from_id(user_id), [user_id]
-                )
-            except HttpResponseException as e:
-                raise e.to_synapse_error()
-            except RequestSendFailed:
-                raise SynapseError(502, "Failed to contact group server")
-
-            result = bulk_result.get("users", {}).get(user_id)
-            # TODO: Verify attestations
-            return {"groups": result}
-
-    @defer.inlineCallbacks
-    def bulk_get_publicised_groups(self, user_ids, proxy=True):
-        destinations = {}
-        local_users = set()
-
-        for user_id in user_ids:
-            if self.hs.is_mine_id(user_id):
-                local_users.add(user_id)
-            else:
-                destinations.setdefault(get_domain_from_id(user_id), set()).add(user_id)
-
-        if not proxy and destinations:
-            raise SynapseError(400, "Some user_ids are not local")
-
-        results = {}
-        failed_results = []
-        for destination, dest_user_ids in iteritems(destinations):
-            try:
-                r = yield self.transport_client.bulk_get_publicised_groups(
-                    destination, list(dest_user_ids)
-                )
-                results.update(r["users"])
-            except Exception:
-                failed_results.extend(dest_user_ids)
-
-        for uid in local_users:
-            results[uid] = yield self.store.get_publicised_groups_for_user(uid)
-
-            # Check AS associated groups for this user - this depends on the
-            # RegExps in the AS registration file (under `users`)
-            for app_service in self.store.get_app_services():
-                results[uid].extend(app_service.get_groups_for_user(uid))
-
-        return {"users": results}
diff --git a/synapse/replication/slave/storage/groups.py b/synapse/replication/slave/storage/groups.py
index 69a4ae42f93..2d4fd08cf54 100644
--- a/synapse/replication/slave/storage/groups.py
+++ b/synapse/replication/slave/storage/groups.py
@@ -13,15 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from synapse.storage import DataStore
+from synapse.replication.slave.storage._base import BaseSlavedStore
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
+from synapse.storage.data_stores.main.group_server import GroupServerWorkerStore
 from synapse.storage.database import Database
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 
-from ._base import BaseSlavedStore, __func__
-from ._slaved_id_tracker import SlavedIdTracker
 
-
-class SlavedGroupServerStore(BaseSlavedStore):
+class SlavedGroupServerStore(GroupServerWorkerStore, BaseSlavedStore):
     def __init__(self, database: Database, db_conn, hs):
         super(SlavedGroupServerStore, self).__init__(database, db_conn, hs)
 
@@ -35,9 +34,8 @@ def __init__(self, database: Database, db_conn, hs):
             self._group_updates_id_gen.get_current_token(),
         )
 
-    get_groups_changes_for_user = __func__(DataStore.get_groups_changes_for_user)
-    get_group_stream_token = __func__(DataStore.get_group_stream_token)
-    get_all_groups_for_user = __func__(DataStore.get_all_groups_for_user)
+    def get_group_stream_token(self):
+        return self._group_updates_id_gen.get_current_token()
 
     def stream_positions(self):
         result = super(SlavedGroupServerStore, self).stream_positions()
diff --git a/synapse/server.py b/synapse/server.py
index 7926867b777..fd2f69e9286 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -50,7 +50,7 @@
 from synapse.federation.sender import FederationSender
 from synapse.federation.transport.client import TransportLayerClient
 from synapse.groups.attestations import GroupAttestationSigning, GroupAttestionRenewer
-from synapse.groups.groups_server import GroupsServerHandler
+from synapse.groups.groups_server import GroupsServerHandler, GroupsServerWorkerHandler
 from synapse.handlers import Handlers
 from synapse.handlers.account_validity import AccountValidityHandler
 from synapse.handlers.acme import AcmeHandler
@@ -62,7 +62,7 @@
 from synapse.handlers.e2e_keys import E2eKeysHandler
 from synapse.handlers.e2e_room_keys import E2eRoomKeysHandler
 from synapse.handlers.events import EventHandler, EventStreamHandler
-from synapse.handlers.groups_local import GroupsLocalHandler
+from synapse.handlers.groups_local import GroupsLocalHandler, GroupsLocalWorkerHandler
 from synapse.handlers.initial_sync import InitialSyncHandler
 from synapse.handlers.message import EventCreationHandler, MessageHandler
 from synapse.handlers.pagination import PaginationHandler
@@ -460,10 +460,16 @@ def build_user_directory_handler(self):
         return UserDirectoryHandler(self)
 
     def build_groups_local_handler(self):
-        return GroupsLocalHandler(self)
+        if self.config.worker_app:
+            return GroupsLocalWorkerHandler(self)
+        else:
+            return GroupsLocalHandler(self)
 
     def build_groups_server_handler(self):
-        return GroupsServerHandler(self)
+        if self.config.worker_app:
+            return GroupsServerWorkerHandler(self)
+        else:
+            return GroupsServerHandler(self)
 
     def build_groups_attestation_signing(self):
         return GroupAttestationSigning(self)
diff --git a/synapse/storage/data_stores/main/group_server.py b/synapse/storage/data_stores/main/group_server.py
index 6acd45e9f36..0963e6c250b 100644
--- a/synapse/storage/data_stores/main/group_server.py
+++ b/synapse/storage/data_stores/main/group_server.py
@@ -27,21 +27,7 @@
 _DEFAULT_ROLE_ID = ""
 
 
-class GroupServerStore(SQLBaseStore):
-    def set_group_join_policy(self, group_id, join_policy):
-        """Set the join policy of a group.
-
-        join_policy can be one of:
-         * "invite"
-         * "open"
-        """
-        return self.db.simple_update_one(
-            table="groups",
-            keyvalues={"group_id": group_id},
-            updatevalues={"join_policy": join_policy},
-            desc="set_group_join_policy",
-        )
-
+class GroupServerWorkerStore(SQLBaseStore):
     def get_group(self, group_id):
         return self.db.simple_select_one(
             table="groups",
@@ -157,6 +143,366 @@ def _get_rooms_for_summary_txn(txn):
             "get_rooms_for_summary", _get_rooms_for_summary_txn
         )
 
+    @defer.inlineCallbacks
+    def get_group_categories(self, group_id):
+        rows = yield self.db.simple_select_list(
+            table="group_room_categories",
+            keyvalues={"group_id": group_id},
+            retcols=("category_id", "is_public", "profile"),
+            desc="get_group_categories",
+        )
+
+        return {
+            row["category_id"]: {
+                "is_public": row["is_public"],
+                "profile": json.loads(row["profile"]),
+            }
+            for row in rows
+        }
+
+    @defer.inlineCallbacks
+    def get_group_category(self, group_id, category_id):
+        category = yield self.db.simple_select_one(
+            table="group_room_categories",
+            keyvalues={"group_id": group_id, "category_id": category_id},
+            retcols=("is_public", "profile"),
+            desc="get_group_category",
+        )
+
+        category["profile"] = json.loads(category["profile"])
+
+        return category
+
+    @defer.inlineCallbacks
+    def get_group_roles(self, group_id):
+        rows = yield self.db.simple_select_list(
+            table="group_roles",
+            keyvalues={"group_id": group_id},
+            retcols=("role_id", "is_public", "profile"),
+            desc="get_group_roles",
+        )
+
+        return {
+            row["role_id"]: {
+                "is_public": row["is_public"],
+                "profile": json.loads(row["profile"]),
+            }
+            for row in rows
+        }
+
+    @defer.inlineCallbacks
+    def get_group_role(self, group_id, role_id):
+        role = yield self.db.simple_select_one(
+            table="group_roles",
+            keyvalues={"group_id": group_id, "role_id": role_id},
+            retcols=("is_public", "profile"),
+            desc="get_group_role",
+        )
+
+        role["profile"] = json.loads(role["profile"])
+
+        return role
+
+    def get_local_groups_for_room(self, room_id):
+        """Get all of the local group that contain a given room
+        Args:
+            room_id (str): The ID of a room
+        Returns:
+            Deferred[list[str]]: A twisted.Deferred containing a list of group ids
+                containing this room
+        """
+        return self.db.simple_select_onecol(
+            table="group_rooms",
+            keyvalues={"room_id": room_id},
+            retcol="group_id",
+            desc="get_local_groups_for_room",
+        )
+
+    def get_users_for_summary_by_role(self, group_id, include_private=False):
+        """Get the users and roles that should be included in a summary request
+
+        Returns ([users], [roles])
+        """
+
+        def _get_users_for_summary_txn(txn):
+            keyvalues = {"group_id": group_id}
+            if not include_private:
+                keyvalues["is_public"] = True
+
+            sql = """
+                SELECT user_id, is_public, role_id, user_order
+                FROM group_summary_users
+                WHERE group_id = ?
+            """
+
+            if not include_private:
+                sql += " AND is_public = ?"
+                txn.execute(sql, (group_id, True))
+            else:
+                txn.execute(sql, (group_id,))
+
+            users = [
+                {
+                    "user_id": row[0],
+                    "is_public": row[1],
+                    "role_id": row[2] if row[2] != _DEFAULT_ROLE_ID else None,
+                    "order": row[3],
+                }
+                for row in txn
+            ]
+
+            sql = """
+                SELECT role_id, is_public, profile, role_order
+                FROM group_summary_roles
+                INNER JOIN group_roles USING (group_id, role_id)
+                WHERE group_id = ?
+            """
+
+            if not include_private:
+                sql += " AND is_public = ?"
+                txn.execute(sql, (group_id, True))
+            else:
+                txn.execute(sql, (group_id,))
+
+            roles = {
+                row[0]: {
+                    "is_public": row[1],
+                    "profile": json.loads(row[2]),
+                    "order": row[3],
+                }
+                for row in txn
+            }
+
+            return users, roles
+
+        return self.db.runInteraction(
+            "get_users_for_summary_by_role", _get_users_for_summary_txn
+        )
+
+    def is_user_in_group(self, user_id, group_id):
+        return self.db.simple_select_one_onecol(
+            table="group_users",
+            keyvalues={"group_id": group_id, "user_id": user_id},
+            retcol="user_id",
+            allow_none=True,
+            desc="is_user_in_group",
+        ).addCallback(lambda r: bool(r))
+
+    def is_user_admin_in_group(self, group_id, user_id):
+        return self.db.simple_select_one_onecol(
+            table="group_users",
+            keyvalues={"group_id": group_id, "user_id": user_id},
+            retcol="is_admin",
+            allow_none=True,
+            desc="is_user_admin_in_group",
+        )
+
+    def is_user_invited_to_local_group(self, group_id, user_id):
+        """Has the group server invited a user?
+        """
+        return self.db.simple_select_one_onecol(
+            table="group_invites",
+            keyvalues={"group_id": group_id, "user_id": user_id},
+            retcol="user_id",
+            desc="is_user_invited_to_local_group",
+            allow_none=True,
+        )
+
+    def get_users_membership_info_in_group(self, group_id, user_id):
+        """Get a dict describing the membership of a user in a group.
+
+        Example if joined:
+
+            {
+                "membership": "join",
+                "is_public": True,
+                "is_privileged": False,
+            }
+
+        Returns an empty dict if the user is not join/invite/etc
+        """
+
+        def _get_users_membership_in_group_txn(txn):
+            row = self.db.simple_select_one_txn(
+                txn,
+                table="group_users",
+                keyvalues={"group_id": group_id, "user_id": user_id},
+                retcols=("is_admin", "is_public"),
+                allow_none=True,
+            )
+
+            if row:
+                return {
+                    "membership": "join",
+                    "is_public": row["is_public"],
+                    "is_privileged": row["is_admin"],
+                }
+
+            row = self.db.simple_select_one_onecol_txn(
+                txn,
+                table="group_invites",
+                keyvalues={"group_id": group_id, "user_id": user_id},
+                retcol="user_id",
+                allow_none=True,
+            )
+
+            if row:
+                return {"membership": "invite"}
+
+            return {}
+
+        return self.db.runInteraction(
+            "get_users_membership_info_in_group", _get_users_membership_in_group_txn
+        )
+
+    def get_publicised_groups_for_user(self, user_id):
+        """Get all groups a user is publicising
+        """
+        return self.db.simple_select_onecol(
+            table="local_group_membership",
+            keyvalues={"user_id": user_id, "membership": "join", "is_publicised": True},
+            retcol="group_id",
+            desc="get_publicised_groups_for_user",
+        )
+
+    def get_attestations_need_renewals(self, valid_until_ms):
+        """Get all attestations that need to be renewed until givent time
+        """
+
+        def _get_attestations_need_renewals_txn(txn):
+            sql = """
+                SELECT group_id, user_id FROM group_attestations_renewals
+                WHERE valid_until_ms <= ?
+            """
+            txn.execute(sql, (valid_until_ms,))
+            return self.db.cursor_to_dict(txn)
+
+        return self.db.runInteraction(
+            "get_attestations_need_renewals", _get_attestations_need_renewals_txn
+        )
+
+    @defer.inlineCallbacks
+    def get_remote_attestation(self, group_id, user_id):
+        """Get the attestation that proves the remote agrees that the user is
+        in the group.
+        """
+        row = yield self.db.simple_select_one(
+            table="group_attestations_remote",
+            keyvalues={"group_id": group_id, "user_id": user_id},
+            retcols=("valid_until_ms", "attestation_json"),
+            desc="get_remote_attestation",
+            allow_none=True,
+        )
+
+        now = int(self._clock.time_msec())
+        if row and now < row["valid_until_ms"]:
+            return json.loads(row["attestation_json"])
+
+        return None
+
+    def get_joined_groups(self, user_id):
+        return self.db.simple_select_onecol(
+            table="local_group_membership",
+            keyvalues={"user_id": user_id, "membership": "join"},
+            retcol="group_id",
+            desc="get_joined_groups",
+        )
+
+    def get_all_groups_for_user(self, user_id, now_token):
+        def _get_all_groups_for_user_txn(txn):
+            sql = """
+                SELECT group_id, type, membership, u.content
+                FROM local_group_updates AS u
+                INNER JOIN local_group_membership USING (group_id, user_id)
+                WHERE user_id = ? AND membership != 'leave'
+                    AND stream_id <= ?
+            """
+            txn.execute(sql, (user_id, now_token))
+            return [
+                {
+                    "group_id": row[0],
+                    "type": row[1],
+                    "membership": row[2],
+                    "content": json.loads(row[3]),
+                }
+                for row in txn
+            ]
+
+        return self.db.runInteraction(
+            "get_all_groups_for_user", _get_all_groups_for_user_txn
+        )
+
+    def get_groups_changes_for_user(self, user_id, from_token, to_token):
+        from_token = int(from_token)
+        has_changed = self._group_updates_stream_cache.has_entity_changed(
+            user_id, from_token
+        )
+        if not has_changed:
+            return defer.succeed([])
+
+        def _get_groups_changes_for_user_txn(txn):
+            sql = """
+                SELECT group_id, membership, type, u.content
+                FROM local_group_updates AS u
+                INNER JOIN local_group_membership USING (group_id, user_id)
+                WHERE user_id = ? AND ? < stream_id AND stream_id <= ?
+            """
+            txn.execute(sql, (user_id, from_token, to_token))
+            return [
+                {
+                    "group_id": group_id,
+                    "membership": membership,
+                    "type": gtype,
+                    "content": json.loads(content_json),
+                }
+                for group_id, membership, gtype, content_json in txn
+            ]
+
+        return self.db.runInteraction(
+            "get_groups_changes_for_user", _get_groups_changes_for_user_txn
+        )
+
+    def get_all_groups_changes(self, from_token, to_token, limit):
+        from_token = int(from_token)
+        has_changed = self._group_updates_stream_cache.has_any_entity_changed(
+            from_token
+        )
+        if not has_changed:
+            return defer.succeed([])
+
+        def _get_all_groups_changes_txn(txn):
+            sql = """
+                SELECT stream_id, group_id, user_id, type, content
+                FROM local_group_updates
+                WHERE ? < stream_id AND stream_id <= ?
+                LIMIT ?
+            """
+            txn.execute(sql, (from_token, to_token, limit))
+            return [
+                (stream_id, group_id, user_id, gtype, json.loads(content_json))
+                for stream_id, group_id, user_id, gtype, content_json in txn
+            ]
+
+        return self.db.runInteraction(
+            "get_all_groups_changes", _get_all_groups_changes_txn
+        )
+
+
+class GroupServerStore(GroupServerWorkerStore):
+    def set_group_join_policy(self, group_id, join_policy):
+        """Set the join policy of a group.
+
+        join_policy can be one of:
+         * "invite"
+         * "open"
+        """
+        return self.db.simple_update_one(
+            table="groups",
+            keyvalues={"group_id": group_id},
+            updatevalues={"join_policy": join_policy},
+            desc="set_group_join_policy",
+        )
+
     def add_room_to_summary(self, group_id, room_id, category_id, order, is_public):
         return self.db.runInteraction(
             "add_room_to_summary",
@@ -299,36 +645,6 @@ def remove_room_from_summary(self, group_id, room_id, category_id):
             desc="remove_room_from_summary",
         )
 
-    @defer.inlineCallbacks
-    def get_group_categories(self, group_id):
-        rows = yield self.db.simple_select_list(
-            table="group_room_categories",
-            keyvalues={"group_id": group_id},
-            retcols=("category_id", "is_public", "profile"),
-            desc="get_group_categories",
-        )
-
-        return {
-            row["category_id"]: {
-                "is_public": row["is_public"],
-                "profile": json.loads(row["profile"]),
-            }
-            for row in rows
-        }
-
-    @defer.inlineCallbacks
-    def get_group_category(self, group_id, category_id):
-        category = yield self.db.simple_select_one(
-            table="group_room_categories",
-            keyvalues={"group_id": group_id, "category_id": category_id},
-            retcols=("is_public", "profile"),
-            desc="get_group_category",
-        )
-
-        category["profile"] = json.loads(category["profile"])
-
-        return category
-
     def upsert_group_category(self, group_id, category_id, profile, is_public):
         """Add/update room category for group
         """
@@ -360,36 +676,6 @@ def remove_group_category(self, group_id, category_id):
             desc="remove_group_category",
         )
 
-    @defer.inlineCallbacks
-    def get_group_roles(self, group_id):
-        rows = yield self.db.simple_select_list(
-            table="group_roles",
-            keyvalues={"group_id": group_id},
-            retcols=("role_id", "is_public", "profile"),
-            desc="get_group_roles",
-        )
-
-        return {
-            row["role_id"]: {
-                "is_public": row["is_public"],
-                "profile": json.loads(row["profile"]),
-            }
-            for row in rows
-        }
-
-    @defer.inlineCallbacks
-    def get_group_role(self, group_id, role_id):
-        role = yield self.db.simple_select_one(
-            table="group_roles",
-            keyvalues={"group_id": group_id, "role_id": role_id},
-            retcols=("is_public", "profile"),
-            desc="get_group_role",
-        )
-
-        role["profile"] = json.loads(role["profile"])
-
-        return role
-
     def upsert_group_role(self, group_id, role_id, profile, is_public):
         """Add/remove user role
         """
@@ -469,251 +755,99 @@ def _add_user_to_summary_txn(
             if not role_exists:
                 raise SynapseError(400, "Role doesn't exist")
 
-            # TODO: Check role is part of the summary already
-            role_exists = self.db.simple_select_one_onecol_txn(
-                txn,
-                table="group_summary_roles",
-                keyvalues={"group_id": group_id, "role_id": role_id},
-                retcol="group_id",
-                allow_none=True,
-            )
-            if not role_exists:
-                # If not, add it with an order larger than all others
-                txn.execute(
-                    """
-                    INSERT INTO group_summary_roles
-                    (group_id, role_id, role_order)
-                    SELECT ?, ?, COALESCE(MAX(role_order), 0) + 1
-                    FROM group_summary_roles
-                    WHERE group_id = ? AND role_id = ?
-                """,
-                    (group_id, role_id, group_id, role_id),
-                )
-
-        existing = self.db.simple_select_one_txn(
-            txn,
-            table="group_summary_users",
-            keyvalues={"group_id": group_id, "user_id": user_id, "role_id": role_id},
-            retcols=("user_order", "is_public"),
-            allow_none=True,
-        )
-
-        if order is not None:
-            # Shuffle other users orders that come after the given order
-            sql = """
-                UPDATE group_summary_users SET user_order = user_order + 1
-                WHERE group_id = ? AND role_id = ? AND user_order >= ?
-            """
-            txn.execute(sql, (group_id, role_id, order))
-        elif not existing:
-            sql = """
-                SELECT COALESCE(MAX(user_order), 0) + 1 FROM group_summary_users
-                WHERE group_id = ? AND role_id = ?
-            """
-            txn.execute(sql, (group_id, role_id))
-            (order,) = txn.fetchone()
-
-        if existing:
-            to_update = {}
-            if order is not None:
-                to_update["user_order"] = order
-            if is_public is not None:
-                to_update["is_public"] = is_public
-            self.db.simple_update_txn(
-                txn,
-                table="group_summary_users",
-                keyvalues={
-                    "group_id": group_id,
-                    "role_id": role_id,
-                    "user_id": user_id,
-                },
-                values=to_update,
-            )
-        else:
-            if is_public is None:
-                is_public = True
-
-            self.db.simple_insert_txn(
-                txn,
-                table="group_summary_users",
-                values={
-                    "group_id": group_id,
-                    "role_id": role_id,
-                    "user_id": user_id,
-                    "user_order": order,
-                    "is_public": is_public,
-                },
-            )
-
-    def remove_user_from_summary(self, group_id, user_id, role_id):
-        if role_id is None:
-            role_id = _DEFAULT_ROLE_ID
-
-        return self.db.simple_delete(
-            table="group_summary_users",
-            keyvalues={"group_id": group_id, "role_id": role_id, "user_id": user_id},
-            desc="remove_user_from_summary",
-        )
-
-    def get_local_groups_for_room(self, room_id):
-        """Get all of the local group that contain a given room
-        Args:
-            room_id (str): The ID of a room
-        Returns:
-            Deferred[list[str]]: A twisted.Deferred containing a list of group ids
-                containing this room
-        """
-        return self.db.simple_select_onecol(
-            table="group_rooms",
-            keyvalues={"room_id": room_id},
-            retcol="group_id",
-            desc="get_local_groups_for_room",
-        )
-
-    def get_users_for_summary_by_role(self, group_id, include_private=False):
-        """Get the users and roles that should be included in a summary request
-
-        Returns ([users], [roles])
-        """
-
-        def _get_users_for_summary_txn(txn):
-            keyvalues = {"group_id": group_id}
-            if not include_private:
-                keyvalues["is_public"] = True
-
-            sql = """
-                SELECT user_id, is_public, role_id, user_order
-                FROM group_summary_users
-                WHERE group_id = ?
-            """
-
-            if not include_private:
-                sql += " AND is_public = ?"
-                txn.execute(sql, (group_id, True))
-            else:
-                txn.execute(sql, (group_id,))
-
-            users = [
-                {
-                    "user_id": row[0],
-                    "is_public": row[1],
-                    "role_id": row[2] if row[2] != _DEFAULT_ROLE_ID else None,
-                    "order": row[3],
-                }
-                for row in txn
-            ]
-
-            sql = """
-                SELECT role_id, is_public, profile, role_order
-                FROM group_summary_roles
-                INNER JOIN group_roles USING (group_id, role_id)
-                WHERE group_id = ?
-            """
-
-            if not include_private:
-                sql += " AND is_public = ?"
-                txn.execute(sql, (group_id, True))
-            else:
-                txn.execute(sql, (group_id,))
-
-            roles = {
-                row[0]: {
-                    "is_public": row[1],
-                    "profile": json.loads(row[2]),
-                    "order": row[3],
-                }
-                for row in txn
-            }
-
-            return users, roles
-
-        return self.db.runInteraction(
-            "get_users_for_summary_by_role", _get_users_for_summary_txn
-        )
-
-    def is_user_in_group(self, user_id, group_id):
-        return self.db.simple_select_one_onecol(
-            table="group_users",
-            keyvalues={"group_id": group_id, "user_id": user_id},
-            retcol="user_id",
-            allow_none=True,
-            desc="is_user_in_group",
-        ).addCallback(lambda r: bool(r))
-
-    def is_user_admin_in_group(self, group_id, user_id):
-        return self.db.simple_select_one_onecol(
-            table="group_users",
-            keyvalues={"group_id": group_id, "user_id": user_id},
-            retcol="is_admin",
-            allow_none=True,
-            desc="is_user_admin_in_group",
-        )
-
-    def add_group_invite(self, group_id, user_id):
-        """Record that the group server has invited a user
-        """
-        return self.db.simple_insert(
-            table="group_invites",
-            values={"group_id": group_id, "user_id": user_id},
-            desc="add_group_invite",
-        )
+            # TODO: Check role is part of the summary already
+            role_exists = self.db.simple_select_one_onecol_txn(
+                txn,
+                table="group_summary_roles",
+                keyvalues={"group_id": group_id, "role_id": role_id},
+                retcol="group_id",
+                allow_none=True,
+            )
+            if not role_exists:
+                # If not, add it with an order larger than all others
+                txn.execute(
+                    """
+                    INSERT INTO group_summary_roles
+                    (group_id, role_id, role_order)
+                    SELECT ?, ?, COALESCE(MAX(role_order), 0) + 1
+                    FROM group_summary_roles
+                    WHERE group_id = ? AND role_id = ?
+                """,
+                    (group_id, role_id, group_id, role_id),
+                )
 
-    def is_user_invited_to_local_group(self, group_id, user_id):
-        """Has the group server invited a user?
-        """
-        return self.db.simple_select_one_onecol(
-            table="group_invites",
-            keyvalues={"group_id": group_id, "user_id": user_id},
-            retcol="user_id",
-            desc="is_user_invited_to_local_group",
+        existing = self.db.simple_select_one_txn(
+            txn,
+            table="group_summary_users",
+            keyvalues={"group_id": group_id, "user_id": user_id, "role_id": role_id},
+            retcols=("user_order", "is_public"),
             allow_none=True,
         )
 
-    def get_users_membership_info_in_group(self, group_id, user_id):
-        """Get a dict describing the membership of a user in a group.
-
-        Example if joined:
-
-            {
-                "membership": "join",
-                "is_public": True,
-                "is_privileged": False,
-            }
-
-        Returns an empty dict if the user is not join/invite/etc
-        """
+        if order is not None:
+            # Shuffle other users orders that come after the given order
+            sql = """
+                UPDATE group_summary_users SET user_order = user_order + 1
+                WHERE group_id = ? AND role_id = ? AND user_order >= ?
+            """
+            txn.execute(sql, (group_id, role_id, order))
+        elif not existing:
+            sql = """
+                SELECT COALESCE(MAX(user_order), 0) + 1 FROM group_summary_users
+                WHERE group_id = ? AND role_id = ?
+            """
+            txn.execute(sql, (group_id, role_id))
+            (order,) = txn.fetchone()
 
-        def _get_users_membership_in_group_txn(txn):
-            row = self.db.simple_select_one_txn(
+        if existing:
+            to_update = {}
+            if order is not None:
+                to_update["user_order"] = order
+            if is_public is not None:
+                to_update["is_public"] = is_public
+            self.db.simple_update_txn(
                 txn,
-                table="group_users",
-                keyvalues={"group_id": group_id, "user_id": user_id},
-                retcols=("is_admin", "is_public"),
-                allow_none=True,
+                table="group_summary_users",
+                keyvalues={
+                    "group_id": group_id,
+                    "role_id": role_id,
+                    "user_id": user_id,
+                },
+                values=to_update,
             )
+        else:
+            if is_public is None:
+                is_public = True
 
-            if row:
-                return {
-                    "membership": "join",
-                    "is_public": row["is_public"],
-                    "is_privileged": row["is_admin"],
-                }
-
-            row = self.db.simple_select_one_onecol_txn(
+            self.db.simple_insert_txn(
                 txn,
-                table="group_invites",
-                keyvalues={"group_id": group_id, "user_id": user_id},
-                retcol="user_id",
-                allow_none=True,
+                table="group_summary_users",
+                values={
+                    "group_id": group_id,
+                    "role_id": role_id,
+                    "user_id": user_id,
+                    "user_order": order,
+                    "is_public": is_public,
+                },
             )
 
-            if row:
-                return {"membership": "invite"}
+    def remove_user_from_summary(self, group_id, user_id, role_id):
+        if role_id is None:
+            role_id = _DEFAULT_ROLE_ID
 
-            return {}
+        return self.db.simple_delete(
+            table="group_summary_users",
+            keyvalues={"group_id": group_id, "role_id": role_id, "user_id": user_id},
+            desc="remove_user_from_summary",
+        )
 
-        return self.db.runInteraction(
-            "get_users_membership_info_in_group", _get_users_membership_in_group_txn
+    def add_group_invite(self, group_id, user_id):
+        """Record that the group server has invited a user
+        """
+        return self.db.simple_insert(
+            table="group_invites",
+            values={"group_id": group_id, "user_id": user_id},
+            desc="add_group_invite",
         )
 
     def add_user_to_group(
@@ -846,16 +980,6 @@ def _remove_room_from_group_txn(txn):
             "remove_room_from_group", _remove_room_from_group_txn
         )
 
-    def get_publicised_groups_for_user(self, user_id):
-        """Get all groups a user is publicising
-        """
-        return self.db.simple_select_onecol(
-            table="local_group_membership",
-            keyvalues={"user_id": user_id, "membership": "join", "is_publicised": True},
-            retcol="group_id",
-            desc="get_publicised_groups_for_user",
-        )
-
     def update_group_publicity(self, group_id, user_id, publicise):
         """Update whether the user is publicising their membership of the group
         """
@@ -1000,22 +1124,6 @@ def update_group_profile(self, group_id, profile):
             desc="update_group_profile",
         )
 
-    def get_attestations_need_renewals(self, valid_until_ms):
-        """Get all attestations that need to be renewed until givent time
-        """
-
-        def _get_attestations_need_renewals_txn(txn):
-            sql = """
-                SELECT group_id, user_id FROM group_attestations_renewals
-                WHERE valid_until_ms <= ?
-            """
-            txn.execute(sql, (valid_until_ms,))
-            return self.db.cursor_to_dict(txn)
-
-        return self.db.runInteraction(
-            "get_attestations_need_renewals", _get_attestations_need_renewals_txn
-        )
-
     def update_attestation_renewal(self, group_id, user_id, attestation):
         """Update an attestation that we have renewed
         """
@@ -1054,112 +1162,6 @@ def remove_attestation_renewal(self, group_id, user_id):
             desc="remove_attestation_renewal",
         )
 
-    @defer.inlineCallbacks
-    def get_remote_attestation(self, group_id, user_id):
-        """Get the attestation that proves the remote agrees that the user is
-        in the group.
-        """
-        row = yield self.db.simple_select_one(
-            table="group_attestations_remote",
-            keyvalues={"group_id": group_id, "user_id": user_id},
-            retcols=("valid_until_ms", "attestation_json"),
-            desc="get_remote_attestation",
-            allow_none=True,
-        )
-
-        now = int(self._clock.time_msec())
-        if row and now < row["valid_until_ms"]:
-            return json.loads(row["attestation_json"])
-
-        return None
-
-    def get_joined_groups(self, user_id):
-        return self.db.simple_select_onecol(
-            table="local_group_membership",
-            keyvalues={"user_id": user_id, "membership": "join"},
-            retcol="group_id",
-            desc="get_joined_groups",
-        )
-
-    def get_all_groups_for_user(self, user_id, now_token):
-        def _get_all_groups_for_user_txn(txn):
-            sql = """
-                SELECT group_id, type, membership, u.content
-                FROM local_group_updates AS u
-                INNER JOIN local_group_membership USING (group_id, user_id)
-                WHERE user_id = ? AND membership != 'leave'
-                    AND stream_id <= ?
-            """
-            txn.execute(sql, (user_id, now_token))
-            return [
-                {
-                    "group_id": row[0],
-                    "type": row[1],
-                    "membership": row[2],
-                    "content": json.loads(row[3]),
-                }
-                for row in txn
-            ]
-
-        return self.db.runInteraction(
-            "get_all_groups_for_user", _get_all_groups_for_user_txn
-        )
-
-    def get_groups_changes_for_user(self, user_id, from_token, to_token):
-        from_token = int(from_token)
-        has_changed = self._group_updates_stream_cache.has_entity_changed(
-            user_id, from_token
-        )
-        if not has_changed:
-            return defer.succeed([])
-
-        def _get_groups_changes_for_user_txn(txn):
-            sql = """
-                SELECT group_id, membership, type, u.content
-                FROM local_group_updates AS u
-                INNER JOIN local_group_membership USING (group_id, user_id)
-                WHERE user_id = ? AND ? < stream_id AND stream_id <= ?
-            """
-            txn.execute(sql, (user_id, from_token, to_token))
-            return [
-                {
-                    "group_id": group_id,
-                    "membership": membership,
-                    "type": gtype,
-                    "content": json.loads(content_json),
-                }
-                for group_id, membership, gtype, content_json in txn
-            ]
-
-        return self.db.runInteraction(
-            "get_groups_changes_for_user", _get_groups_changes_for_user_txn
-        )
-
-    def get_all_groups_changes(self, from_token, to_token, limit):
-        from_token = int(from_token)
-        has_changed = self._group_updates_stream_cache.has_any_entity_changed(
-            from_token
-        )
-        if not has_changed:
-            return defer.succeed([])
-
-        def _get_all_groups_changes_txn(txn):
-            sql = """
-                SELECT stream_id, group_id, user_id, type, content
-                FROM local_group_updates
-                WHERE ? < stream_id AND stream_id <= ?
-                LIMIT ?
-            """
-            txn.execute(sql, (from_token, to_token, limit))
-            return [
-                (stream_id, group_id, user_id, gtype, json.loads(content_json))
-                for stream_id, group_id, user_id, gtype, content_json in txn
-            ]
-
-        return self.db.runInteraction(
-            "get_all_groups_changes", _get_all_groups_changes_txn
-        )
-
     def get_group_stream_token(self):
         return self._group_updates_id_gen.get_current_token()
 

From 8a29def84abb8e71b1e75746ead8394fddc5843f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 7 Feb 2020 15:45:39 +0000
Subject: [PATCH 094/278] Add support for putting fed user query API on workers
 (#6873)

---
 changelog.d/6873.feature                |  1 +
 docs/workers.md                         |  1 +
 synapse/app/federation_reader.py        |  2 ++
 synapse/federation/federation_server.py |  7 +++--
 synapse/handlers/device.py              | 35 +++++++++++--------------
 5 files changed, 25 insertions(+), 21 deletions(-)
 create mode 100644 changelog.d/6873.feature

diff --git a/changelog.d/6873.feature b/changelog.d/6873.feature
new file mode 100644
index 00000000000..bbedf8f7f03
--- /dev/null
+++ b/changelog.d/6873.feature
@@ -0,0 +1 @@
+Add ability to route federation user device queries to workers.
diff --git a/docs/workers.md b/docs/workers.md
index 82442d6a0a0..6f7ec587804 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -176,6 +176,7 @@ endpoints matching the following regular expressions:
     ^/_matrix/federation/v1/query_auth/
     ^/_matrix/federation/v1/event_auth/
     ^/_matrix/federation/v1/exchange_third_party_invite/
+    ^/_matrix/federation/v1/user/devices/
     ^/_matrix/federation/v1/send/
     ^/_matrix/federation/v1/get_groups_publicised$
     ^/_matrix/key/v2/query
diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py
index 5e17ef1396c..d055d11b235 100644
--- a/synapse/app/federation_reader.py
+++ b/synapse/app/federation_reader.py
@@ -33,6 +33,7 @@
 from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
+from synapse.replication.slave.storage.devices import SlavedDeviceStore
 from synapse.replication.slave.storage.directory import DirectoryStore
 from synapse.replication.slave.storage.events import SlavedEventStore
 from synapse.replication.slave.storage.groups import SlavedGroupServerStore
@@ -68,6 +69,7 @@ class FederationReaderSlavedStore(
     SlavedKeyStore,
     SlavedRegistrationStore,
     SlavedGroupServerStore,
+    SlavedDeviceStore,
     RoomStore,
     DirectoryStore,
     SlavedTransactionStore,
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index a4c97ed458f..b3e4db507e3 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -82,6 +82,8 @@ def __init__(self, hs):
         self.handler = hs.get_handlers().federation_handler
         self.state = hs.get_state_handler()
 
+        self.device_handler = hs.get_device_handler()
+
         self._server_linearizer = Linearizer("fed_server")
         self._transaction_linearizer = Linearizer("fed_txn_handler")
 
@@ -528,8 +530,9 @@ async def on_query_auth_request(self, origin, content, room_id, event_id):
     def on_query_client_keys(self, origin, content):
         return self.on_query_request("client_keys", content)
 
-    def on_query_user_devices(self, origin, user_id):
-        return self.on_query_request("user_devices", user_id)
+    async def on_query_user_devices(self, origin: str, user_id: str):
+        keys = await self.device_handler.on_federation_query_user_devices(user_id)
+        return 200, keys
 
     @trace
     async def on_claim_client_keys(self, origin, content):
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index a9bd431486a..6d8e48ed39c 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -225,6 +225,22 @@ def get_user_ids_changed(self, user_id, from_token):
 
         return result
 
+    @defer.inlineCallbacks
+    def on_federation_query_user_devices(self, user_id):
+        stream_id, devices = yield self.store.get_devices_with_keys_by_user(user_id)
+        master_key = yield self.store.get_e2e_cross_signing_key(user_id, "master")
+        self_signing_key = yield self.store.get_e2e_cross_signing_key(
+            user_id, "self_signing"
+        )
+
+        return {
+            "user_id": user_id,
+            "stream_id": stream_id,
+            "devices": devices,
+            "master_key": master_key,
+            "self_signing_key": self_signing_key,
+        }
+
 
 class DeviceHandler(DeviceWorkerHandler):
     def __init__(self, hs):
@@ -239,9 +255,6 @@ def __init__(self, hs):
         federation_registry.register_edu_handler(
             "m.device_list_update", self.device_list_updater.incoming_device_list_update
         )
-        federation_registry.register_query_handler(
-            "user_devices", self.on_federation_query_user_devices
-        )
 
         hs.get_distributor().observe("user_left_room", self.user_left_room)
 
@@ -456,22 +469,6 @@ def notify_user_signature_update(self, from_user_id, user_ids):
 
         self.notifier.on_new_event("device_list_key", position, users=[from_user_id])
 
-    @defer.inlineCallbacks
-    def on_federation_query_user_devices(self, user_id):
-        stream_id, devices = yield self.store.get_devices_with_keys_by_user(user_id)
-        master_key = yield self.store.get_e2e_cross_signing_key(user_id, "master")
-        self_signing_key = yield self.store.get_e2e_cross_signing_key(
-            user_id, "self_signing"
-        )
-
-        return {
-            "user_id": user_id,
-            "stream_id": stream_id,
-            "devices": devices,
-            "master_key": master_key,
-            "self_signing_key": self_signing_key,
-        }
-
     @defer.inlineCallbacks
     def user_left_room(self, user, room_id):
         user_id = user.to_string()

From d840ee5bdefcd4278a65a8f38d50ad9c9cace677 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 27 Feb 2020 10:44:55 +0000
Subject: [PATCH 095/278] Revert "skip send without trailing slash"

I think this was done back when most synapses would reject the
no-trailing-slash version; it's no longer required, and makes matrix.org spec-incompliant.

This reverts commit fc5be50d561dcf0f069c5b4920faa0dfd3a962e2.
---
 synapse/federation/transport/client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 9c6f22760db..dc563538deb 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -152,7 +152,7 @@ def send_transaction(self, transaction, json_data_callback=None):
         # generated by the json_data_callback.
         json_data = transaction.get_dict()
 
-        path = _create_v1_path("/send/%s/", transaction.transaction_id)
+        path = _create_v1_path("/send/%s", transaction.transaction_id)
 
         response = yield self.client.put_json(
             transaction.destination,
@@ -161,7 +161,7 @@ def send_transaction(self, transaction, json_data_callback=None):
             json_data_callback=json_data_callback,
             long_retries=True,
             backoff_on_404=True,  # If we get a 404 the other side has gone
-            # try_trailing_slash_on_400=True,
+            try_trailing_slash_on_400=True,
         )
 
         return response

From 936686ed2dc5d5010cc4e58663b181c5ee0dcc5b Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 11 Mar 2020 15:21:25 +0000
Subject: [PATCH 096/278] Don't filter out events when we're checking the
 visibility of state

---
 synapse/handlers/message.py           |  2 +-
 synapse/res/templates/saml_error.html | 42 +++++++++++++++++++++++++++
 synapse/visibility.py                 | 15 +++++-----
 3 files changed, 50 insertions(+), 9 deletions(-)
 create mode 100644 synapse/res/templates/saml_error.html

diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 58faeab247b..857bb478c2f 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -160,7 +160,7 @@ def get_state_events(
                 raise NotFoundError("Can't find event for token %s" % (at_token,))
 
             visible_events = yield filter_events_for_client(
-                self.storage, user_id, last_events, apply_retention_policies=False
+                self.storage, user_id, last_events, filter_send_to_client=False
             )
 
             event = last_events[0]
diff --git a/synapse/res/templates/saml_error.html b/synapse/res/templates/saml_error.html
new file mode 100644
index 00000000000..31841c3137d
--- /dev/null
+++ b/synapse/res/templates/saml_error.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>SSO error</title>
+</head>
+<body>
+    <p>Oops! Something went wrong during authentication<span id="errormsg"></span>.</p>
+    <p>
+        If you're seeing this page after clicking a link sent to you via email, make
+        sure you only click the confirmation link once, and that you open the
+        validation link in the same client you're logging in from.
+    </p>
+    <p>
+        Try logging in again from your Matrix client and if the problem persists
+        please contact the server's administrator.
+    </p>
+
+    <script type="text/javascript">
+        // Error handling to support Auth0 errors that we might get through a GET request
+        // to the validation endpoint. If an error is provided, it's either going to be
+        // located in the query string or in a query string-like URI fragment.
+        // We try to locate the error from any of these two locations, but if we can't
+        // we just don't print anything specific.
+        let searchStr = "";
+        if (window.location.search) {
+            // For some reason window.location.searchParams isn't always defined when
+            // window.location.search is, so we can't just use it right away.
+            searchStr = window.location.search;
+        } else if (window.location.hash) {
+            //
+            searchStr = window.location.hash.replace("#", "?");
+        }
+
+        let errorDesc = new URLSearchParams(searchStr).get("error_description")
+
+        if (errorDesc) {
+            document.getElementById("errormsg").innerHTML = ` ("${errorDesc}")`;
+        }
+    </script>
+</body>
+</html>
\ No newline at end of file
diff --git a/synapse/visibility.py b/synapse/visibility.py
index a48a4f3dfe4..1d538b206d7 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -49,7 +49,7 @@ def filter_events_for_client(
     events,
     is_peeking=False,
     always_include_ids=frozenset(),
-    apply_retention_policies=True,
+    filter_send_to_client=True,
 ):
     """
     Check which events a user is allowed to see. If the user can see the event but its
@@ -65,10 +65,9 @@ def filter_events_for_client(
             events
         always_include_ids (set(event_id)): set of event ids to specifically
             include (unless sender is ignored)
-        apply_retention_policies (bool): Whether to filter out events that's older than
-            allowed by the room's retention policy. Useful when this function is called
-            to e.g. check whether a user should be allowed to see the state at a given
-            event rather than to know if it should send an event to a user's client(s).
+        filter_send_to_client (bool): Whether we're checking an event that's going to be
+            sent to a client. This might not always be the case since this function can
+            also be called to check whether a user can see the state at a given point.
 
     Returns:
         Deferred[list[synapse.events.EventBase]]
@@ -96,7 +95,7 @@ def filter_events_for_client(
 
     erased_senders = yield storage.main.are_users_erased((e.sender for e in events))
 
-    if apply_retention_policies:
+    if not filter_send_to_client:
         room_ids = {e.room_id for e in events}
         retention_policies = {}
 
@@ -119,7 +118,7 @@ def allowed(event):
 
                the original event if they can see it as normal.
         """
-        if event.type == "org.matrix.dummy_event":
+        if event.type == "org.matrix.dummy_event" and filter_send_to_client:
             return None
 
         if not event.is_state() and event.sender in ignore_list:
@@ -134,7 +133,7 @@ def allowed(event):
 
         # Don't try to apply the room's retention policy if the event is a state event, as
         # MSC1763 states that retention is only considered for non-state events.
-        if apply_retention_policies and not event.is_state():
+        if filter_send_to_client and not event.is_state():
             retention_policy = retention_policies[event.room_id]
             max_lifetime = retention_policy.get("max_lifetime")
 

From 6b73b8b70c7b673987897d8e3f871f90c9c15d11 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 11 Mar 2020 15:32:07 +0000
Subject: [PATCH 097/278] Fix condition

---
 synapse/visibility.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/visibility.py b/synapse/visibility.py
index 1d538b206d7..d0b2241e484 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -95,7 +95,7 @@ def filter_events_for_client(
 
     erased_senders = yield storage.main.are_users_erased((e.sender for e in events))
 
-    if not filter_send_to_client:
+    if filter_send_to_client:
         room_ids = {e.room_id for e in events}
         retention_policies = {}
 

From 568461b5ec3b4eebe68e6bbe21fc792c71af6a65 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 11 Mar 2020 17:04:18 +0000
Subject: [PATCH 098/278] Also don't filter out events sent by ignored users
 when checking state visibility

---
 synapse/visibility.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/visibility.py b/synapse/visibility.py
index d0b2241e484..82a21324278 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -121,7 +121,7 @@ def allowed(event):
         if event.type == "org.matrix.dummy_event" and filter_send_to_client:
             return None
 
-        if not event.is_state() and event.sender in ignore_list:
+        if not event.is_state() and event.sender in ignore_list and filter_send_to_client:
             return None
 
         # Until MSC2261 has landed we can't redact malicious alias events, so for

From 08d68c52960ac3c2a36e1aaab0fd02d1f7fc324a Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 12 Mar 2020 15:59:24 +0000
Subject: [PATCH 099/278] Populate the room version from state events

See `rooms_version_column_3.sql.postgres` for details about why we need to do
that.
---
 .../57/rooms_version_column_3.sql.postgres    | 38 +++++++++++++++++++
 .../57/rooms_version_column_3.sql.sqlite      | 22 +++++++++++
 2 files changed, 60 insertions(+)
 create mode 100644 synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.postgres
 create mode 100644 synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.sqlite

diff --git a/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.postgres b/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.postgres
new file mode 100644
index 00000000000..31be4d8e73e
--- /dev/null
+++ b/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.postgres
@@ -0,0 +1,38 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- When we first added the room_version column to the rooms table, it was populated from
+-- the current_state_events table. However, there was an issue causing a background
+-- update to clean up the current_state_events table for rooms where the server is no
+-- longer participating, before that column could be populated. Therefore, some rooms had
+-- a NULL room_version.
+
+-- The rooms_version_column_2.sql.* delta files were introduced to make the populating
+-- synchronous instead of running it in a background update, which fixed this issue.
+-- However, all of the instances of Synapse installed or updated in the meantime got
+-- their rooms table corrupted with NULL room_versions.
+
+-- This query fishes out the room versions from the create event using the state_events
+-- table instead of the current_state_events one, as the former still have all of the
+-- create events.
+
+UPDATE rooms SET room_version=(
+    SELECT COALESCE(json::json->'content'->>'room_version','1')
+    FROM state_events se INNER JOIN event_json ej USING (event_id)
+    WHERE se.room_id=rooms.room_id AND se.type='m.room.create' AND se.state_key=''
+) WHERE rooms.room_version IS NULL;
+
+-- see also rooms_version_column_3.sql.sqlite which has a copy of the above query, using
+-- sqlite syntax for the json extraction.
diff --git a/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.sqlite b/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.sqlite
new file mode 100644
index 00000000000..d13911a64e0
--- /dev/null
+++ b/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.sqlite
@@ -0,0 +1,22 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- see rooms_version_column_3.sql.postgres for details of what's going on here.
+
+UPDATE rooms SET room_version=(
+    SELECT COALESCE(json_extract(ej.json, '$.content.room_version'), '1')
+    FROM state_events se INNER JOIN event_json ej USING (event_id)
+    WHERE se.room_id=rooms.room_id AND se.type='m.room.create' AND se.state_key=''
+) WHERE rooms.room_version IS NULL;

From 03c694bb0885234618ff2762eade4a05f880f735 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 12 Mar 2020 16:48:11 +0000
Subject: [PATCH 100/278] Fix schema deltas

---
 .../main/schema/delta/57/rooms_version_column_3.sql.postgres     | 1 +
 .../main/schema/delta/57/rooms_version_column_3.sql.sqlite       | 1 +
 2 files changed, 2 insertions(+)

diff --git a/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.postgres b/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.postgres
index 31be4d8e73e..92aaadde0d9 100644
--- a/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.postgres
+++ b/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.postgres
@@ -32,6 +32,7 @@ UPDATE rooms SET room_version=(
     SELECT COALESCE(json::json->'content'->>'room_version','1')
     FROM state_events se INNER JOIN event_json ej USING (event_id)
     WHERE se.room_id=rooms.room_id AND se.type='m.room.create' AND se.state_key=''
+    LIMIT 1
 ) WHERE rooms.room_version IS NULL;
 
 -- see also rooms_version_column_3.sql.sqlite which has a copy of the above query, using
diff --git a/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.sqlite b/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.sqlite
index d13911a64e0..e19dab97cbf 100644
--- a/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.sqlite
+++ b/synapse/storage/data_stores/main/schema/delta/57/rooms_version_column_3.sql.sqlite
@@ -19,4 +19,5 @@ UPDATE rooms SET room_version=(
     SELECT COALESCE(json_extract(ej.json, '$.content.room_version'), '1')
     FROM state_events se INNER JOIN event_json ej USING (event_id)
     WHERE se.room_id=rooms.room_id AND se.type='m.room.create' AND se.state_key=''
+    LIMIT 1
 ) WHERE rooms.room_version IS NULL;

From 9b8212d2561c9d26b8cac2f46a8e4c76e2cc3994 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Fri, 17 Apr 2020 17:36:24 +0100
Subject: [PATCH 101/278] Update changelog

---
 changelog.d/7289.bugfix | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changelog.d/7289.bugfix b/changelog.d/7289.bugfix
index 5b4fbd77ac2..1568e1569b3 100644
--- a/changelog.d/7289.bugfix
+++ b/changelog.d/7289.bugfix
@@ -1 +1 @@
-Fix an edge-case where it was not possible to cross-sign a user which did not share a room with any user on your homeserver. The bug only affected Synapse deployments in worker mode.
+Fix a bug with cross-signing devices of users on other homeservers while in worker mode.
\ No newline at end of file

From fb3f1fb5c0192f7c4e29acdc987720a49f8f2f18 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Fri, 17 Apr 2020 17:36:53 +0100
Subject: [PATCH 102/278] Fix log lines, return type, tuple handling

---
 synapse/handlers/e2e_keys.py | 40 ++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index afc173ab2f6..aa08e5bb975 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -987,12 +987,22 @@ def _get_e2e_cross_signing_verify_key(
             SynapseError: if `user_id` is invalid
         """
         user = UserID.from_string(user_id)
+
         key_id = None
         verify_key = None
-
         key = yield self.store.get_e2e_cross_signing_key(
             user_id, key_type, from_user_id
         )
+        if key is not None:
+            try:
+                key_id, verify_key = get_verify_key_from_cross_signing_key(key)
+            except ValueError as e:
+                logger.warning(
+                    "Invalid %s key retrieved: %s - %s %s", key_type, key, type(e), e,
+                )
+                raise SynapseError(
+                    502, "Invalid %s key retrieved from database" % (key_type,)
+                )
 
         # If we couldn't find the key locally, and we're looking for keys of
         # another user then attempt to fetch the missing key from the remote
@@ -1008,7 +1018,7 @@ def _get_e2e_cross_signing_verify_key(
             # We only get "master" and "self_signing" keys from remote servers
             and key_type in ["master", "self_signing"]
         ):
-            key = yield self._retrieve_cross_signing_keys_for_remote_user(
+            key, key_id, verify_key = yield self._retrieve_cross_signing_keys_for_remote_user(
                 user, key_type
             )
 
@@ -1016,24 +1026,12 @@ def _get_e2e_cross_signing_verify_key(
             logger.debug("No %s key found for %s", key_type, user_id)
             raise NotFoundError("No %s key found for %s" % (key_type, user_id))
 
-        # If we retrieved the keys remotely, these values will already be set
-        if key_id is None or verify_key is None:
-            try:
-                key_id, verify_key = get_verify_key_from_cross_signing_key(key)
-            except ValueError as e:
-                logger.debug(
-                    "Invalid %s key retrieved: %s - %s %s", key_type, key, type(e), e,
-                )
-                raise SynapseError(
-                    502, "Invalid %s key retrieved from remote server", key_type
-                )
-
         return key, key_id, verify_key
 
     @defer.inlineCallbacks
     def _retrieve_cross_signing_keys_for_remote_user(
         self, user: UserID, desired_key_type: str,
-    ) -> Tuple[Optional[Dict], Optional[str], Optional[VerifyKey]]:
+    ):
         """Queries cross-signing keys for a remote user and saves them to the database
 
         Only the key specified by `key_type` will be returned, while all retrieved keys
@@ -1044,7 +1042,8 @@ def _retrieve_cross_signing_keys_for_remote_user(
             desired_key_type: The type of key to receive. One of "master", "self_signing"
 
         Returns:
-            A tuple of the retrieved key content, the key's ID and the matching VerifyKey.
+            Deferred[Tuple[Optional[Dict], Optional[str], Optional[VerifyKey]]]: A tuple
+            of the retrieved key content, the key's ID and the matching VerifyKey.
             If the key cannot be retrieved, all values in the tuple will instead be None.
         """
         try:
@@ -1059,7 +1058,7 @@ def _retrieve_cross_signing_keys_for_remote_user(
                 type(e),
                 e,
             )
-            return None
+            return None, None, None
 
         # Process each of the retrieved cross-signing keys
         final_key = None
@@ -1084,8 +1083,9 @@ def _retrieve_cross_signing_keys_for_remote_user(
                 # algorithm and colon, which is the device ID
                 key_id, verify_key = get_verify_key_from_cross_signing_key(key_content)
             except ValueError as e:
-                logger.debug(
-                    "Invalid %s key retrieved: %s - %s %s",
+                logger.warning(
+                    "Invalid %s key retrieved from remote %s: %s - %s %s",
+                    user.domain,
                     key_type,
                     key_content,
                     type(e),
@@ -1094,7 +1094,7 @@ def _retrieve_cross_signing_keys_for_remote_user(
                 continue
             device_ids.append(verify_key.version)
 
-            # If this is the desired key type, save it and it's ID/VerifyKey
+            # If this is the desired key type, save it and its ID/VerifyKey
             if key_type == desired_key_type:
                 final_key = key_content
                 final_verify_key = verify_key

From 2fdfa96ee63b9838e1b0e255cae531687a726aca Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Fri, 17 Apr 2020 17:38:36 +0100
Subject: [PATCH 103/278] lint

---
 synapse/handlers/e2e_keys.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index aa08e5bb975..f096ea8a539 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -16,7 +16,6 @@
 # limitations under the License.
 
 import logging
-from typing import Dict, Optional, Tuple
 
 from six import iteritems
 
@@ -24,7 +23,6 @@
 from canonicaljson import encode_canonical_json, json
 from signedjson.key import decode_verify_key_bytes
 from signedjson.sign import SignatureVerifyException, verify_signed_json
-from signedjson.types import VerifyKey
 from unpaddedbase64 import decode_base64
 
 from twisted.internet import defer
@@ -1018,9 +1016,11 @@ def _get_e2e_cross_signing_verify_key(
             # We only get "master" and "self_signing" keys from remote servers
             and key_type in ["master", "self_signing"]
         ):
-            key, key_id, verify_key = yield self._retrieve_cross_signing_keys_for_remote_user(
-                user, key_type
-            )
+            (
+                key,
+                key_id,
+                verify_key,
+            ) = yield self._retrieve_cross_signing_keys_for_remote_user(user, key_type)
 
         if key is None:
             logger.debug("No %s key found for %s", key_type, user_id)

From da5e6eea450a22c540f3da1709f8b23c89f39999 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 21 Apr 2020 11:00:57 +0100
Subject: [PATCH 104/278] Revert recent merges of #7289 into
 matrix-org-hotfixes

This was incorrectly merged before it was ready.

This reverts commit aead826d2dee183bc6003a22612911e1664246c3, reversing
changes made to 4cd2a4ae3a0722816371766b31515432ec7ada6c.

It also reverts commits 9b8212d25, fb3f1fb5c and 2fdfa96ee.
---
 changelog.d/7289.bugfix                |   1 -
 synapse/federation/transport/client.py |  14 +--
 synapse/handlers/e2e_keys.py           | 138 ++-----------------------
 3 files changed, 12 insertions(+), 141 deletions(-)
 delete mode 100644 changelog.d/7289.bugfix

diff --git a/changelog.d/7289.bugfix b/changelog.d/7289.bugfix
deleted file mode 100644
index 1568e1569b3..00000000000
--- a/changelog.d/7289.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a bug with cross-signing devices of users on other homeservers while in worker mode.
\ No newline at end of file
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index c35637a571d..dc563538deb 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -406,19 +406,13 @@ def query_client_keys(self, destination, query_content, timeout):
               "device_keys": {
                 "<user_id>": {
                   "<device_id>": {...}
-              } }
-              "master_keys": {
-                "<user_id>": {...}
-              } }
-              "self_signing_keys": {
-                "<user_id>": {...}
             } } }
 
         Args:
             destination(str): The server to query.
             query_content(dict): The user ids to query.
         Returns:
-            A dict containing device and cross-signing keys.
+            A dict containg the device keys.
         """
         path = _create_v1_path("/user/keys/query")
 
@@ -435,16 +429,14 @@ def query_user_devices(self, destination, user_id, timeout):
         Response:
             {
               "stream_id": "...",
-              "devices": [ { ... } ],
-              "master_key": { ... },
-              "self_signing_key: { ... }
+              "devices": [ { ... } ]
             }
 
         Args:
             destination(str): The server to query.
             query_content(dict): The user ids to query.
         Returns:
-            A dict containing device and cross-signing keys.
+            A dict containg the device keys.
         """
         path = _create_v1_path("/user/devices/%s", user_id)
 
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index f096ea8a539..8d7075f2eb2 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -174,8 +174,8 @@ def do_remote_query(destination):
             """This is called when we are querying the device list of a user on
             a remote homeserver and their device list is not in the device list
             cache. If we share a room with this user and we're not querying for
-            specific user we will update the cache with their device list.
-            """
+            specific user we will update the cache
+            with their device list."""
 
             destination_query = remote_queries_not_in_cache[destination]
 
@@ -961,19 +961,13 @@ def _process_other_signatures(self, user_id, signatures):
         return signature_list, failures
 
     @defer.inlineCallbacks
-    def _get_e2e_cross_signing_verify_key(
-        self, user_id: str, key_type: str, from_user_id: str = None
-    ):
-        """Fetch locally or remotely query for a cross-signing public key.
-
-        First, attempt to fetch the cross-signing public key from storage.
-        If that fails, query the keys from the homeserver they belong to
-        and update our local copy.
+    def _get_e2e_cross_signing_verify_key(self, user_id, key_type, from_user_id=None):
+        """Fetch the cross-signing public key from storage and interpret it.
 
         Args:
-            user_id: the user whose key should be fetched
-            key_type: the type of key to fetch
-            from_user_id: the user that we are fetching the keys for.
+            user_id (str): the user whose key should be fetched
+            key_type (str): the type of key to fetch
+            from_user_id (str): the user that we are fetching the keys for.
                 This affects what signatures are fetched.
 
         Returns:
@@ -982,130 +976,16 @@ def _get_e2e_cross_signing_verify_key(
 
         Raises:
             NotFoundError: if the key is not found
-            SynapseError: if `user_id` is invalid
         """
-        user = UserID.from_string(user_id)
-
-        key_id = None
-        verify_key = None
         key = yield self.store.get_e2e_cross_signing_key(
             user_id, key_type, from_user_id
         )
-        if key is not None:
-            try:
-                key_id, verify_key = get_verify_key_from_cross_signing_key(key)
-            except ValueError as e:
-                logger.warning(
-                    "Invalid %s key retrieved: %s - %s %s", key_type, key, type(e), e,
-                )
-                raise SynapseError(
-                    502, "Invalid %s key retrieved from database" % (key_type,)
-                )
-
-        # If we couldn't find the key locally, and we're looking for keys of
-        # another user then attempt to fetch the missing key from the remote
-        # user's server.
-        #
-        # We may run into this in possible edge cases where a user tries to
-        # cross-sign a remote user, but does not share any rooms with them yet.
-        # Thus, we would not have their key list yet. We fetch the key here,
-        # store it and notify clients of new, associated device IDs.
-        if (
-            key is None
-            and not self.is_mine(user)
-            # We only get "master" and "self_signing" keys from remote servers
-            and key_type in ["master", "self_signing"]
-        ):
-            (
-                key,
-                key_id,
-                verify_key,
-            ) = yield self._retrieve_cross_signing_keys_for_remote_user(user, key_type)
-
         if key is None:
-            logger.debug("No %s key found for %s", key_type, user_id)
+            logger.debug("no %s key found for %s", key_type, user_id)
             raise NotFoundError("No %s key found for %s" % (key_type, user_id))
-
+        key_id, verify_key = get_verify_key_from_cross_signing_key(key)
         return key, key_id, verify_key
 
-    @defer.inlineCallbacks
-    def _retrieve_cross_signing_keys_for_remote_user(
-        self, user: UserID, desired_key_type: str,
-    ):
-        """Queries cross-signing keys for a remote user and saves them to the database
-
-        Only the key specified by `key_type` will be returned, while all retrieved keys
-        will be saved regardless
-
-        Args:
-            user: The user to query remote keys for
-            desired_key_type: The type of key to receive. One of "master", "self_signing"
-
-        Returns:
-            Deferred[Tuple[Optional[Dict], Optional[str], Optional[VerifyKey]]]: A tuple
-            of the retrieved key content, the key's ID and the matching VerifyKey.
-            If the key cannot be retrieved, all values in the tuple will instead be None.
-        """
-        try:
-            remote_result = yield self.federation.query_user_devices(
-                user.domain, user.to_string()
-            )
-        except Exception as e:
-            logger.warning(
-                "Unable to query %s for cross-signing keys of user %s: %s %s",
-                user.domain,
-                user.to_string(),
-                type(e),
-                e,
-            )
-            return None, None, None
-
-        # Process each of the retrieved cross-signing keys
-        final_key = None
-        final_key_id = None
-        final_verify_key = None
-        device_ids = []
-        for key_type in ["master", "self_signing"]:
-            key_content = remote_result.get(key_type + "_key")
-            if not key_content:
-                continue
-
-            # At the same time, store this key in the db for
-            # subsequent queries
-            yield self.store.set_e2e_cross_signing_key(
-                user.to_string(), key_type, key_content
-            )
-
-            # Note down the device ID attached to this key
-            try:
-                # verify_key is a VerifyKey from signedjson, which uses
-                # .version to denote the portion of the key ID after the
-                # algorithm and colon, which is the device ID
-                key_id, verify_key = get_verify_key_from_cross_signing_key(key_content)
-            except ValueError as e:
-                logger.warning(
-                    "Invalid %s key retrieved from remote %s: %s - %s %s",
-                    user.domain,
-                    key_type,
-                    key_content,
-                    type(e),
-                    e,
-                )
-                continue
-            device_ids.append(verify_key.version)
-
-            # If this is the desired key type, save it and its ID/VerifyKey
-            if key_type == desired_key_type:
-                final_key = key_content
-                final_verify_key = verify_key
-                final_key_id = key_id
-
-        # Notify clients that new devices for this user have been discovered
-        if device_ids:
-            yield self.device_handler.notify_device_update(user.to_string(), device_ids)
-
-        return final_key, final_key_id, final_verify_key
-
 
 def _check_cross_signing_key(key, user_id, key_type, signing_key=None):
     """Check a cross-signing key uploaded by a user.  Performs some basic sanity

From 5debf3071cc04896b2ab6169dcfd9ea2586200c7 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 4 May 2020 16:44:21 +0100
Subject: [PATCH 105/278] Fix redis password support

---
 synapse/replication/tcp/redis.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py
index 617e860f95d..41c623d7373 100644
--- a/synapse/replication/tcp/redis.py
+++ b/synapse/replication/tcp/redis.py
@@ -61,6 +61,7 @@ class RedisSubscriber(txredisapi.SubscriberProtocol, AbstractConnection):
     outbound_redis_connection = None  # type: txredisapi.RedisProtocol
 
     def connectionMade(self):
+        super().connectionMade()
         logger.info("Connected to redis instance")
         self.subscribe(self.stream_name)
         self.send_command(ReplicateCommand())
@@ -119,6 +120,7 @@ async def handle_command(self, cmd: Command):
             logger.warning("Unhandled command: %r", cmd)
 
     def connectionLost(self, reason):
+        super().connectionLost(reason)
         logger.info("Lost connection to redis instance")
         self.handler.lost_connection(self)
 
@@ -189,5 +191,6 @@ def buildProtocol(self, addr):
         p.handler = self.handler
         p.outbound_redis_connection = self.outbound_redis_connection
         p.stream_name = self.stream_name
+        p.password = self.password
 
         return p

From 323cfe3efb433d4d34d3fe428935692b55984145 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 6 May 2020 12:14:01 +0100
Subject: [PATCH 106/278] fix bad merge

---
 synapse/handlers/room_member.py | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index cdae8e57bc9..79d69f9b3b0 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -268,28 +268,27 @@ async def update_membership(
     ):
         key = (room_id,)
 
-<<<<<<< HEAD
         as_id = object()
         if requester.app_service:
             as_id = requester.app_service.id
 
         then = self.clock.time_msec()
 
-        with (yield self.member_limiter.queue(as_id)):
+        with (await self.member_limiter.queue(as_id)):
             diff = self.clock.time_msec() - then
 
             if diff > 80 * 1000:
                 # haproxy would have timed the request out anyway...
                 raise SynapseError(504, "took to long to process")
 
-            with (yield self.member_linearizer.queue(key)):
+            with (await self.member_linearizer.queue(key)):
                 diff = self.clock.time_msec() - then
 
                 if diff > 80 * 1000:
                     # haproxy would have timed the request out anyway...
                     raise SynapseError(504, "took to long to process")
 
-                result = yield self._update_membership(
+                result = await self._update_membership(
                     requester,
                     target,
                     room_id,
@@ -301,21 +300,6 @@ async def update_membership(
                     content=content,
                     require_consent=require_consent,
                 )
-=======
-        with (await self.member_linearizer.queue(key)):
-            result = await self._update_membership(
-                requester,
-                target,
-                room_id,
-                action,
-                txn_id=txn_id,
-                remote_room_hosts=remote_room_hosts,
-                third_party_signed=third_party_signed,
-                ratelimit=ratelimit,
-                content=content,
-                require_consent=require_consent,
-            )
->>>>>>> release-v1.13.0
 
         return result
 

From 4d3ebc36203ffaf2079024335d4bb285f1c6a00e Mon Sep 17 00:00:00 2001
From: Patrick Cloke <patrickc@matrix.org>
Date: Thu, 7 May 2020 08:34:12 -0400
Subject: [PATCH 107/278] Disable validation that a UI authentication session
 has not been modified during a request cycle.

Partial backout of 1c1242acba9694a3a4b1eb3b14ec0bac11ee4ff8 (#7068)
---
 synapse/handlers/auth.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 7613e5b6ab3..a167498add8 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -329,18 +329,6 @@ async def check_auth(
                 # isn't arbitrary.
                 clientdict = session.clientdict
 
-            # Ensure that the queried operation does not vary between stages of
-            # the UI authentication session. This is done by generating a stable
-            # comparator based on the URI, method, and body (minus the auth dict)
-            # and storing it during the initial query. Subsequent queries ensure
-            # that this comparator has not changed.
-            comparator = (uri, method, clientdict)
-            if (session.uri, session.method, session.clientdict) != comparator:
-                raise SynapseError(
-                    403,
-                    "Requested operation has changed during the UI authentication session.",
-                )
-
         if not authdict:
             raise InteractiveAuthIncompleteError(
                 self._auth_dict_for_flows(flows, session.session_id)

From c4c84b67d58487247c8591caa4af2f459db78d7c Mon Sep 17 00:00:00 2001
From: Patrick Cloke <patrickc@matrix.org>
Date: Thu, 7 May 2020 10:03:57 -0400
Subject: [PATCH 108/278] Disable a failing test.

---
 tests/rest/client/v2_alpha/test_auth.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/rest/client/v2_alpha/test_auth.py b/tests/rest/client/v2_alpha/test_auth.py
index 587be7b2e71..efc20f86aa8 100644
--- a/tests/rest/client/v2_alpha/test_auth.py
+++ b/tests/rest/client/v2_alpha/test_auth.py
@@ -182,6 +182,9 @@ def test_cannot_change_operation(self):
         self.render(request)
         self.assertEqual(channel.code, 403)
 
+    # This behavior is currently disabled.
+    test_cannot_change_operation.skip = True
+
     def test_complete_operation_unknown_session(self):
         """
         Attempting to mark an invalid session as complete should error.

From d7c7f64f176ab73cfdcc1a26d71cfbffc49c506c Mon Sep 17 00:00:00 2001
From: Patrick Cloke <patrickc@matrix.org>
Date: Thu, 7 May 2020 10:07:09 -0400
Subject: [PATCH 109/278] Propagate changes to the client dict to the database.

---
 synapse/handlers/auth.py                    |  4 +++-
 synapse/storage/data_stores/main/ui_auth.py | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index a167498add8..1d779d29782 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -317,7 +317,7 @@ async def check_auth(
             except StoreError:
                 raise SynapseError(400, "Unknown session ID: %s" % (sid,))
 
-            if not clientdict:
+            if clientdict:
                 # This was designed to allow the client to omit the parameters
                 # and just supply the session in subsequent calls so it split
                 # auth between devices by just sharing the session, (eg. so you
@@ -327,6 +327,8 @@ async def check_auth(
                 # on a homeserver.
                 # Revisit: Assuming the REST APIs do sensible validation, the data
                 # isn't arbitrary.
+                await self.store.set_ui_auth_clientdict(sid, clientdict)
+            else:
                 clientdict = session.clientdict
 
         if not authdict:
diff --git a/synapse/storage/data_stores/main/ui_auth.py b/synapse/storage/data_stores/main/ui_auth.py
index c8eebc93784..1d8ee22fb11 100644
--- a/synapse/storage/data_stores/main/ui_auth.py
+++ b/synapse/storage/data_stores/main/ui_auth.py
@@ -172,6 +172,27 @@ async def get_completed_ui_auth_stages(
 
         return results
 
+    async def set_ui_auth_clientdict(
+        self, session_id: str, clientdict: JsonDict
+    ) -> None:
+        """
+        Store an updated clientdict for a given session ID.
+
+        Args:
+            session_id: The ID of this session as returned from check_auth
+            clientdict:
+                The dictionary from the client root level, not the 'auth' key.
+        """
+        # The clientdict gets stored as JSON.
+        clientdict_json = json.dumps(clientdict)
+
+        self.db.simple_update_one(
+            table="ui_auth_sessions",
+            keyvalues={"session_id": session_id},
+            updatevalues={"clientdict": clientdict_json},
+            desc="set_ui_auth_client_dict",
+        )
+
     async def set_ui_auth_session_data(self, session_id: str, key: str, value: Any):
         """
         Store a key-value pair into the sessions data associated with this

From 6610343332bd16cfebe94fb93fe7b55c7362e28a Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Mon, 11 May 2020 13:08:14 +0100
Subject: [PATCH 110/278] Revert emergency registration patches

Revert "Merge remote-tracking branch 'origin/clokep/no-validate-ui-auth-sess' into matrix-org-hotfixes"

This reverts commit 5adad58d959ed0b249d43a9df81f034edc8876e7, reversing
changes made to 617541c4c6f9dea1ac1ed0a8f1ab848507457e23.
---
 synapse/handlers/auth.py                    |  4 +---
 synapse/storage/data_stores/main/ui_auth.py | 21 ---------------------
 tests/rest/client/v2_alpha/test_auth.py     |  3 ---
 3 files changed, 1 insertion(+), 27 deletions(-)

diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 1d779d29782..a167498add8 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -317,7 +317,7 @@ async def check_auth(
             except StoreError:
                 raise SynapseError(400, "Unknown session ID: %s" % (sid,))
 
-            if clientdict:
+            if not clientdict:
                 # This was designed to allow the client to omit the parameters
                 # and just supply the session in subsequent calls so it split
                 # auth between devices by just sharing the session, (eg. so you
@@ -327,8 +327,6 @@ async def check_auth(
                 # on a homeserver.
                 # Revisit: Assuming the REST APIs do sensible validation, the data
                 # isn't arbitrary.
-                await self.store.set_ui_auth_clientdict(sid, clientdict)
-            else:
                 clientdict = session.clientdict
 
         if not authdict:
diff --git a/synapse/storage/data_stores/main/ui_auth.py b/synapse/storage/data_stores/main/ui_auth.py
index 1d8ee22fb11..c8eebc93784 100644
--- a/synapse/storage/data_stores/main/ui_auth.py
+++ b/synapse/storage/data_stores/main/ui_auth.py
@@ -172,27 +172,6 @@ async def get_completed_ui_auth_stages(
 
         return results
 
-    async def set_ui_auth_clientdict(
-        self, session_id: str, clientdict: JsonDict
-    ) -> None:
-        """
-        Store an updated clientdict for a given session ID.
-
-        Args:
-            session_id: The ID of this session as returned from check_auth
-            clientdict:
-                The dictionary from the client root level, not the 'auth' key.
-        """
-        # The clientdict gets stored as JSON.
-        clientdict_json = json.dumps(clientdict)
-
-        self.db.simple_update_one(
-            table="ui_auth_sessions",
-            keyvalues={"session_id": session_id},
-            updatevalues={"clientdict": clientdict_json},
-            desc="set_ui_auth_client_dict",
-        )
-
     async def set_ui_auth_session_data(self, session_id: str, key: str, value: Any):
         """
         Store a key-value pair into the sessions data associated with this
diff --git a/tests/rest/client/v2_alpha/test_auth.py b/tests/rest/client/v2_alpha/test_auth.py
index efc20f86aa8..587be7b2e71 100644
--- a/tests/rest/client/v2_alpha/test_auth.py
+++ b/tests/rest/client/v2_alpha/test_auth.py
@@ -182,9 +182,6 @@ def test_cannot_change_operation(self):
         self.render(request)
         self.assertEqual(channel.code, 403)
 
-    # This behavior is currently disabled.
-    test_cannot_change_operation.skip = True
-
     def test_complete_operation_unknown_session(self):
         """
         Attempting to mark an invalid session as complete should error.

From 7ff7a415d17128fa50dd05b8b033be3ee811770c Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Mon, 11 May 2020 13:08:48 +0100
Subject: [PATCH 111/278] Revert emergency registration patches

Revert "Merge commit '4d3ebc' into matrix-org-hotfixes"

This reverts commit 617541c4c6f9dea1ac1ed0a8f1ab848507457e23, reversing
changes made to ae4f6140f134b8a9296b35ff15b37641912c76ec.
---
 synapse/handlers/auth.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index a167498add8..7613e5b6ab3 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -329,6 +329,18 @@ async def check_auth(
                 # isn't arbitrary.
                 clientdict = session.clientdict
 
+            # Ensure that the queried operation does not vary between stages of
+            # the UI authentication session. This is done by generating a stable
+            # comparator based on the URI, method, and body (minus the auth dict)
+            # and storing it during the initial query. Subsequent queries ensure
+            # that this comparator has not changed.
+            comparator = (uri, method, clientdict)
+            if (session.uri, session.method, session.clientdict) != comparator:
+                raise SynapseError(
+                    403,
+                    "Requested operation has changed during the UI authentication session.",
+                )
+
         if not authdict:
             raise InteractiveAuthIncompleteError(
                 self._auth_dict_for_flows(flows, session.session_id)

From 84639b32ae9f99de0405186d6568fd510caff21e Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 20 May 2020 22:11:30 +0100
Subject: [PATCH 112/278] stub out GET presence requests

---
 synapse/app/generic_worker.py | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 667ad204289..9d9849365a7 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -140,31 +140,18 @@
 
 class PresenceStatusStubServlet(RestServlet):
     """If presence is disabled this servlet can be used to stub out setting
-    presence status, while proxying the getters to the master instance.
+    presence status.
     """
 
     PATTERNS = client_patterns("/presence/(?P<user_id>[^/]*)/status")
 
     def __init__(self, hs):
         super(PresenceStatusStubServlet, self).__init__()
-        self.http_client = hs.get_simple_http_client()
         self.auth = hs.get_auth()
-        self.main_uri = hs.config.worker_main_http_uri
 
     async def on_GET(self, request, user_id):
-        # Pass through the auth headers, if any, in case the access token
-        # is there.
-        auth_headers = request.requestHeaders.getRawHeaders("Authorization", [])
-        headers = {"Authorization": auth_headers}
-
-        try:
-            result = await self.http_client.get_json(
-                self.main_uri + request.uri.decode("ascii"), headers=headers
-            )
-        except HttpResponseException as e:
-            raise e.to_synapse_error()
-
-        return 200, result
+        await self.auth.get_user_by_req(request)
+        return 200, {"state": "offline"}
 
     async def on_PUT(self, request, user_id):
         await self.auth.get_user_by_req(request)

From b3a9ad124c703b51238a33ed33aaa0f95041df7c Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 20 May 2020 22:17:39 +0100
Subject: [PATCH 113/278] Fix field name in stubbed out presence servlet

---
 synapse/app/generic_worker.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 9d9849365a7..d78c79e0f92 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -27,7 +27,7 @@
 import synapse
 import synapse.events
 from synapse.api.constants import EventTypes
-from synapse.api.errors import HttpResponseException, SynapseError
+from synapse.api.errors import SynapseError
 from synapse.api.urls import (
     CLIENT_API_PREFIX,
     FEDERATION_PREFIX,
@@ -151,7 +151,7 @@ def __init__(self, hs):
 
     async def on_GET(self, request, user_id):
         await self.auth.get_user_by_req(request)
-        return 200, {"state": "offline"}
+        return 200, {"presence": "offline"}
 
     async def on_PUT(self, request, user_id):
         await self.auth.get_user_by_req(request)

From 1c347c84bf2287b364932c62e75b853e7af996e3 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 20 May 2020 23:33:13 +0100
Subject: [PATCH 114/278] inline some config references

---
 synapse/handlers/message.py                   |  8 +++++---
 .../resource_limits_server_notices.py         | 15 +++++++--------
 .../data_stores/main/monthly_active_users.py  | 19 +++++++++++--------
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index a622a600b48..66b46bd59f3 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -403,8 +403,10 @@ def __init__(self, hs):
         if self._block_events_without_consent_error:
             self._consent_uri_builder = ConsentURIBuilder(self.config)
 
+        self._is_worker_app = self.config.worker_app is not None
+
         if (
-            not self.config.worker_app
+            not self._is_worker_app
             and self.config.cleanup_extremities_with_dummy_events
         ):
             self.clock.looping_call(
@@ -824,7 +826,7 @@ async def handle_new_client_event(
         success = False
         try:
             # If we're a worker we need to hit out to the master.
-            if self.config.worker_app:
+            if self._is_worker_app:
                 await self.send_event_to_master(
                     event_id=event.event_id,
                     store=self.store,
@@ -890,7 +892,7 @@ async def persist_and_notify_client_event(
 
         This should only be run on master.
         """
-        assert not self.config.worker_app
+        assert not self._is_worker_app
 
         if ratelimit:
             # We check if this is a room admin redacting an event so that we
diff --git a/synapse/server_notices/resource_limits_server_notices.py b/synapse/server_notices/resource_limits_server_notices.py
index d97166351e8..73f2cedb5cf 100644
--- a/synapse/server_notices/resource_limits_server_notices.py
+++ b/synapse/server_notices/resource_limits_server_notices.py
@@ -48,6 +48,12 @@ def __init__(self, hs):
 
         self._notifier = hs.get_notifier()
 
+        self._enabled = (
+            hs.config.limit_usage_by_mau
+            and self._server_notices_manager.is_enabled()
+            and not hs.config.hs_disabled
+        )
+
     async def maybe_send_server_notice_to_user(self, user_id):
         """Check if we need to send a notice to this user, this will be true in
         two cases.
@@ -61,14 +67,7 @@ async def maybe_send_server_notice_to_user(self, user_id):
         Returns:
             Deferred
         """
-        if self._config.hs_disabled is True:
-            return
-
-        if self._config.limit_usage_by_mau is False:
-            return
-
-        if not self._server_notices_manager.is_enabled():
-            # Don't try and send server notices unless they've been enabled
+        if not self._enabled:
             return
 
         timestamp = await self._store.user_last_seen_monthly_active(user_id)
diff --git a/synapse/storage/data_stores/main/monthly_active_users.py b/synapse/storage/data_stores/main/monthly_active_users.py
index 925bc5691b2..a624d1f1b6a 100644
--- a/synapse/storage/data_stores/main/monthly_active_users.py
+++ b/synapse/storage/data_stores/main/monthly_active_users.py
@@ -122,6 +122,10 @@ class MonthlyActiveUsersStore(MonthlyActiveUsersWorkerStore):
     def __init__(self, database: Database, db_conn, hs):
         super(MonthlyActiveUsersStore, self).__init__(database, db_conn, hs)
 
+        self._limit_usage_by_mau = hs.config.limit_usage_by_mau
+        self._mau_stats_only = hs.config.mau_stats_only
+        self._max_mau_value = hs.config.max_mau_value
+
         # Do not add more reserved users than the total allowable number
         # cur = LoggingTransaction(
         self.db.new_transaction(
@@ -130,7 +134,7 @@ def __init__(self, database: Database, db_conn, hs):
             [],
             [],
             self._initialise_reserved_users,
-            hs.config.mau_limits_reserved_threepids[: self.hs.config.max_mau_value],
+            hs.config.mau_limits_reserved_threepids[: self._max_mau_value],
         )
 
     def _initialise_reserved_users(self, txn, threepids):
@@ -191,8 +195,7 @@ def _reap_users(txn, reserved_users):
 
             txn.execute(sql, query_args)
 
-            max_mau_value = self.hs.config.max_mau_value
-            if self.hs.config.limit_usage_by_mau:
+            if self._limit_usage_by_mau:
                 # If MAU user count still exceeds the MAU threshold, then delete on
                 # a least recently active basis.
                 # Note it is not possible to write this query using OFFSET due to
@@ -210,13 +213,13 @@ def _reap_users(txn, reserved_users):
                             LIMIT ?
                         )
                         """
-                    txn.execute(sql, (max_mau_value,))
+                    txn.execute(sql, ((self._max_mau_value),))
                 # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
                 # when len(reserved_users) == 0. Works fine on sqlite.
                 else:
                     # Must be >= 0 for postgres
                     num_of_non_reserved_users_to_remove = max(
-                        max_mau_value - len(reserved_users), 0
+                        self._max_mau_value - len(reserved_users), 0
                     )
 
                     # It is important to filter reserved users twice to guard
@@ -335,7 +338,7 @@ def populate_monthly_active_users(self, user_id):
         Args:
             user_id(str): the user_id to query
         """
-        if self.hs.config.limit_usage_by_mau or self.hs.config.mau_stats_only:
+        if self._limit_usage_by_mau or self._mau_stats_only:
             # Trial users and guests should not be included as part of MAU group
             is_guest = yield self.is_guest(user_id)
             if is_guest:
@@ -356,11 +359,11 @@ def populate_monthly_active_users(self, user_id):
                 # In the case where mau_stats_only is True and limit_usage_by_mau is
                 # False, there is no point in checking get_monthly_active_count - it
                 # adds no value and will break the logic if max_mau_value is exceeded.
-                if not self.hs.config.limit_usage_by_mau:
+                if not self._limit_usage_by_mau:
                     yield self.upsert_monthly_active_user(user_id)
                 else:
                     count = yield self.get_monthly_active_count()
-                    if count < self.hs.config.max_mau_value:
+                    if count < self._max_mau_value:
                         yield self.upsert_monthly_active_user(user_id)
             elif now - last_seen_timestamp > LAST_SEEN_GRANULARITY:
                 yield self.upsert_monthly_active_user(user_id)

From 104c4902742461bfcbb8be645f4af782680b8a6e Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 18 Sep 2020 14:51:11 +0100
Subject: [PATCH 115/278] Use _check_sigs_and_hash_and_fetch to validate
 backfill requests (#8350)

This is a bit of a hack, as `_check_sigs_and_hash_and_fetch` is intended
for attempting to pull an event from the database/(re)pull it from the
server that originally sent the event if checking the signature of the
event fails.

During backfill we *know* that we won't have the event in our database,
however it is still useful to be able to query the original sending
server as the server we're backfilling from may be acting maliciously.

The main benefit and reason for this change however is that
`_check_sigs_and_hash_and_fetch` will drop an event during backfill if
it cannot be successfully validated, whereas the current code will
simply fail the backfill request - resulting in the client's /messages
request silently being dropped.

This is a quick patch to fix backfilling rooms that contain malformed
events. A better implementation in planned in future.
---
 changelog.d/8350.bugfix                 | 1 +
 synapse/federation/federation_client.py | 8 +++-----
 2 files changed, 4 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/8350.bugfix

diff --git a/changelog.d/8350.bugfix b/changelog.d/8350.bugfix
new file mode 100644
index 00000000000..0e493c02821
--- /dev/null
+++ b/changelog.d/8350.bugfix
@@ -0,0 +1 @@
+Partially mitigate bug where newly joined servers couldn't get past events in a room when there is a malformed event.
\ No newline at end of file
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index a2e8d96ea27..d42930d1b94 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -217,11 +217,9 @@ async def backfill(
             for p in transaction_data["pdus"]
         ]
 
-        # FIXME: We should handle signature failures more gracefully.
-        pdus[:] = await make_deferred_yieldable(
-            defer.gatherResults(
-                self._check_sigs_and_hashes(room_version, pdus), consumeErrors=True,
-            ).addErrback(unwrapFirstError)
+        # Check signatures and hash of pdus, removing any from the list that fail checks
+        pdus[:] = await self._check_sigs_and_hash_and_fetch(
+            dest, pdus, outlier=True, room_version=room_version
         )
 
         return pdus

From 9eea5c43afb0e7e3fde47576be0ce931f17559f7 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 18 Sep 2020 14:25:52 +0100
Subject: [PATCH 116/278] Intelligently select extremities used in backfill.
 (#8349)

Instead of just using the most recent extremities let's pick the
ones that will give us results that the pagination request cares about,
i.e. pick extremities only if they have a smaller depth than the
pagination token.

This is useful when we fail to backfill an extremity, as we no longer
get stuck requesting that same extremity repeatedly.
---
 changelog.d/8349.bugfix                  |  1 +
 synapse/handlers/federation.py           | 65 +++++++++++++++++++++---
 synapse/handlers/pagination.py           |  8 +--
 synapse/storage/databases/main/stream.py | 13 ++---
 4 files changed, 67 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/8349.bugfix

diff --git a/changelog.d/8349.bugfix b/changelog.d/8349.bugfix
new file mode 100644
index 00000000000..cf2f531b148
--- /dev/null
+++ b/changelog.d/8349.bugfix
@@ -0,0 +1 @@
+Fix a longstanding bug where back pagination over federation could get stuck if it failed to handle a received event.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 43f2986f895..014dab2940c 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -943,15 +943,26 @@ async def backfill(self, dest, room_id, limit, extremities):
 
         return events
 
-    async def maybe_backfill(self, room_id, current_depth):
+    async def maybe_backfill(
+        self, room_id: str, current_depth: int, limit: int
+    ) -> bool:
         """Checks the database to see if we should backfill before paginating,
         and if so do.
+
+        Args:
+            room_id
+            current_depth: The depth from which we're paginating from. This is
+                used to decide if we should backfill and what extremities to
+                use.
+            limit: The number of events that the pagination request will
+                return. This is used as part of the heuristic to decide if we
+                should back paginate.
         """
         extremities = await self.store.get_oldest_events_with_depth_in_room(room_id)
 
         if not extremities:
             logger.debug("Not backfilling as no extremeties found.")
-            return
+            return False
 
         # We only want to paginate if we can actually see the events we'll get,
         # as otherwise we'll just spend a lot of resources to get redacted
@@ -1004,16 +1015,54 @@ async def maybe_backfill(self, room_id, current_depth):
         sorted_extremeties_tuple = sorted(extremities.items(), key=lambda e: -int(e[1]))
         max_depth = sorted_extremeties_tuple[0][1]
 
+        # If we're approaching an extremity we trigger a backfill, otherwise we
+        # no-op.
+        #
+        # We chose twice the limit here as then clients paginating backwards
+        # will send pagination requests that trigger backfill at least twice
+        # using the most recent extremity before it gets removed (see below). We
+        # chose more than one times the limit in case of failure, but choosing a
+        # much larger factor will result in triggering a backfill request much
+        # earlier than necessary.
+        if current_depth - 2 * limit > max_depth:
+            logger.debug(
+                "Not backfilling as we don't need to. %d < %d - 2 * %d",
+                max_depth,
+                current_depth,
+                limit,
+            )
+            return False
+
+        logger.debug(
+            "room_id: %s, backfill: current_depth: %s, max_depth: %s, extrems: %s",
+            room_id,
+            current_depth,
+            max_depth,
+            sorted_extremeties_tuple,
+        )
+
+        # We ignore extremities that have a greater depth than our current depth
+        # as:
+        #    1. we don't really care about getting events that have happened
+        #       before our current position; and
+        #    2. we have likely previously tried and failed to backfill from that
+        #       extremity, so to avoid getting "stuck" requesting the same
+        #       backfill repeatedly we drop those extremities.
+        filtered_sorted_extremeties_tuple = [
+            t for t in sorted_extremeties_tuple if int(t[1]) <= current_depth
+        ]
+
+        # However, we need to check that the filtered extremities are non-empty.
+        # If they are empty then either we can a) bail or b) still attempt to
+        # backill. We opt to try backfilling anyway just in case we do get
+        # relevant events.
+        if filtered_sorted_extremeties_tuple:
+            sorted_extremeties_tuple = filtered_sorted_extremeties_tuple
+
         # We don't want to specify too many extremities as it causes the backfill
         # request URI to be too long.
         extremities = dict(sorted_extremeties_tuple[:5])
 
-        if current_depth > max_depth:
-            logger.debug(
-                "Not backfilling as we don't need to. %d < %d", max_depth, current_depth
-            )
-            return
-
         # Now we need to decide which hosts to hit first.
 
         # First we try hosts that are already in the room
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 34ed0e29215..6067585f9bb 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -362,9 +362,9 @@ async def get_messages(
                 # if we're going backwards, we might need to backfill. This
                 # requires that we have a topo token.
                 if room_token.topological:
-                    max_topo = room_token.topological
+                    curr_topo = room_token.topological
                 else:
-                    max_topo = await self.store.get_max_topological_token(
+                    curr_topo = await self.store.get_current_topological_token(
                         room_id, room_token.stream
                     )
 
@@ -380,11 +380,11 @@ async def get_messages(
                     leave_token = await self.store.get_topological_token_for_event(
                         member_event_id
                     )
-                    if RoomStreamToken.parse(leave_token).topological < max_topo:
+                    if RoomStreamToken.parse(leave_token).topological < curr_topo:
                         source_config.from_key = str(leave_token)
 
                 await self.hs.get_handlers().federation_handler.maybe_backfill(
-                    room_id, max_topo
+                    room_id, curr_topo, limit=source_config.limit,
                 )
 
             events, next_key = await self.store.paginate_room_events(
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index be6df8a6d1d..db20a3db30f 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -648,23 +648,20 @@ async def get_topological_token_for_event(self, event_id: str) -> str:
         )
         return "t%d-%d" % (row["topological_ordering"], row["stream_ordering"])
 
-    async def get_max_topological_token(self, room_id: str, stream_key: int) -> int:
-        """Get the max topological token in a room before the given stream
+    async def get_current_topological_token(self, room_id: str, stream_key: int) -> int:
+        """Gets the topological token in a room after or at the given stream
         ordering.
 
         Args:
             room_id
             stream_key
-
-        Returns:
-            The maximum topological token.
         """
         sql = (
-            "SELECT coalesce(max(topological_ordering), 0) FROM events"
-            " WHERE room_id = ? AND stream_ordering < ?"
+            "SELECT coalesce(MIN(topological_ordering), 0) FROM events"
+            " WHERE room_id = ? AND stream_ordering >= ?"
         )
         row = await self.db_pool.execute(
-            "get_max_topological_token", None, sql, room_id, stream_key
+            "get_current_topological_token", None, sql, room_id, stream_key
         )
         return row[0][0] if row else 0
 

From cfb3096e33d8087a4b5214945d0b24295bb1c56a Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 1 Oct 2020 11:19:09 +0100
Subject: [PATCH 117/278] Revert federation-transaction-transmission backoff
 hacks

This reverts b852a8247, 15b2a5081, 28889d8da.

I don't think these patches are required any more, and if they are, they should
be on mainline, not hidden in our hotfixes branch. Let's try backing them out:
if that turns out to be an error, we can PR them properly.
---
 synapse/federation/sender/__init__.py         | 20 +------------------
 .../sender/per_destination_queue.py           | 15 --------------
 .../federation/sender/transaction_manager.py  |  4 ----
 3 files changed, 1 insertion(+), 38 deletions(-)

diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index b22869501cf..552519e82c7 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -151,25 +151,10 @@ def notify_new_events(self, current_id: int) -> None:
             "process_event_queue_for_federation", self._process_event_queue_loop
         )
 
-    async def _process_event_queue_loop(self):
-        loop_start_time = self.clock.time_msec()
+    async def _process_event_queue_loop(self) -> None:
         try:
             self._is_processing = True
             while True:
-                # if we've been going around this loop for a long time without
-                # catching up, deprioritise transaction transmission. This should mean
-                # that events get batched into fewer transactions, which is more
-                # efficient, and hence give us a chance to catch up
-                if (
-                    self.clock.time_msec() - loop_start_time > 60 * 1000
-                    and not self._transaction_manager.deprioritise_transmission
-                ):
-                    logger.warning(
-                        "Event queue is getting behind: deprioritising transaction "
-                        "transmission"
-                    )
-                    self._transaction_manager.deprioritise_transmission = True
-
                 last_token = await self.store.get_federation_out_pos("events")
                 next_token, events = await self.store.get_all_new_events_stream(
                     last_token, self._last_poked_id, limit=100
@@ -279,9 +264,6 @@ async def handle_room_events(events: Iterable[EventBase]) -> None:
 
         finally:
             self._is_processing = False
-            if self._transaction_manager.deprioritise_transmission:
-                logger.info("Event queue caught up: re-prioritising transmission")
-                self._transaction_manager.deprioritise_transmission = False
 
     def _send_pdu(self, pdu: EventBase, destinations: Iterable[str]) -> None:
         # We loop through all destinations to see whether we already have
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index b4da52e7e6c..defc228c236 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 import datetime
 import logging
-import random
 from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Tuple
 
 from prometheus_client import Counter
@@ -40,8 +39,6 @@
 # This is defined in the Matrix spec and enforced by the receiver.
 MAX_EDUS_PER_TRANSACTION = 100
 
-DEPRIORITISE_SLEEP_TIME = 10
-
 logger = logging.getLogger(__name__)
 
 
@@ -223,18 +220,6 @@ async def _transaction_transmission_loop(self) -> None:
 
             pending_pdus = []
             while True:
-                if self._transaction_manager.deprioritise_transmission:
-                    # if the event-processing loop has got behind, sleep to give it
-                    # a chance to catch up. Add some randomness so that the transmitters
-                    # don't all wake up in sync.
-                    sleeptime = random.uniform(
-                        DEPRIORITISE_SLEEP_TIME, DEPRIORITISE_SLEEP_TIME * 2
-                    )
-                    logger.info(
-                        "TX [%s]: sleeping for %f seconds", self._destination, sleeptime
-                    )
-                    await self._clock.sleep(sleeptime)
-
                 # We have to keep 2 free slots for presence and rr_edus
                 limit = MAX_EDUS_PER_TRANSACTION - 2
 
diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py
index cf472c9f151..c84072ab730 100644
--- a/synapse/federation/sender/transaction_manager.py
+++ b/synapse/federation/sender/transaction_manager.py
@@ -51,10 +51,6 @@ def __init__(self, hs: "synapse.server.HomeServer"):
         # HACK to get unique tx id
         self._next_txn_id = int(self.clock.time_msec())
 
-        # the federation sender sometimes sets this to delay transaction transmission,
-        # if the sender gets behind.
-        self.deprioritise_transmission = False
-
     @measure_func("_send_new_transaction")
     async def send_new_transaction(
         self, destination: str, pdus: List[EventBase], edus: List[Edu],

From ac6c5f198ea28d621063015b8f61295ec69a2edc Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 1 Oct 2020 11:31:07 +0100
Subject: [PATCH 118/278] Remove dangling changelog.d files

These result from PRs which were cherry-picked from release branches.
---
 changelog.d/6126.feature | 1 -
 changelog.d/6418.bugfix  | 1 -
 changelog.d/8349.bugfix  | 1 -
 changelog.d/8350.bugfix  | 1 -
 4 files changed, 4 deletions(-)
 delete mode 100644 changelog.d/6126.feature
 delete mode 100644 changelog.d/6418.bugfix
 delete mode 100644 changelog.d/8349.bugfix
 delete mode 100644 changelog.d/8350.bugfix

diff --git a/changelog.d/6126.feature b/changelog.d/6126.feature
deleted file mode 100644
index 1207ba6206a..00000000000
--- a/changelog.d/6126.feature
+++ /dev/null
@@ -1 +0,0 @@
-Group events into larger federation transactions at times of high traffic.
diff --git a/changelog.d/6418.bugfix b/changelog.d/6418.bugfix
deleted file mode 100644
index a1f488d3a22..00000000000
--- a/changelog.d/6418.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix phone home stats reporting.
diff --git a/changelog.d/8349.bugfix b/changelog.d/8349.bugfix
deleted file mode 100644
index cf2f531b148..00000000000
--- a/changelog.d/8349.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a longstanding bug where back pagination over federation could get stuck if it failed to handle a received event.
diff --git a/changelog.d/8350.bugfix b/changelog.d/8350.bugfix
deleted file mode 100644
index 0e493c02821..00000000000
--- a/changelog.d/8350.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Partially mitigate bug where newly joined servers couldn't get past events in a room when there is a malformed event.
\ No newline at end of file

From 607367aeb1c9bf6366ebdf093d0bdba37891fe1b Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 1 Oct 2020 11:43:16 +0100
Subject: [PATCH 119/278] Fix typo in comment

I think this came from a bad merge
---
 synapse/storage/databases/main/monthly_active_users.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/databases/main/monthly_active_users.py b/synapse/storage/databases/main/monthly_active_users.py
index 097a16cb2e1..e93aad33cd8 100644
--- a/synapse/storage/databases/main/monthly_active_users.py
+++ b/synapse/storage/databases/main/monthly_active_users.py
@@ -206,7 +206,7 @@ def _reap_users(txn, reserved_users):
                 # a least recently active basis.
                 # Note it is not possible to write this query using OFFSET due to
                 # incompatibilities in how sqlite and postgres support the feature.
-                # Sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be presents,
+                # Sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present,
                 # while Postgres does not require 'LIMIT', but also does not support
                 # negative LIMIT values. So there is no way to write it that both can
                 # support

From e675bbcc49b04f9799bacb6e7f266573d8a117fc Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 1 Oct 2020 11:51:09 +0100
Subject: [PATCH 120/278] Remove redundant
 `EventCreationHandler._is_worker_app` attribute

This was added in 1c347c84bf/#7544 as a temporary optimisation. That was never
merged to develop, since it conflicted with #7492. The merge cf92310da forgot
to remove it.
---
 synapse/handlers/message.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 16c367b30b7..3a9183e0b00 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -412,10 +412,8 @@ def __init__(self, hs: "HomeServer"):
         if self._block_events_without_consent_error:
             self._consent_uri_builder = ConsentURIBuilder(self.config)
 
-        self._is_worker_app = self.config.worker_app is not None
-
         if (
-            not self._is_worker_app
+            not self.config.worker_app
             and self.config.cleanup_extremities_with_dummy_events
         ):
             self.clock.looping_call(

From 1c22954668b67b21b4741026193c3eee2f61b161 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 1 Oct 2020 12:10:55 +0100
Subject: [PATCH 121/278] Revert "Temporary fix to ensure kde can contact
 matrix.org if stuff breaks"

This reverts commit d90b0946ed775ca228895dd9f7e63bd16bed6391.

We believe this is no longer required.
---
 synapse/http/federation/well_known_resolver.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py
index fabe5d9939a..a306faa267c 100644
--- a/synapse/http/federation/well_known_resolver.py
+++ b/synapse/http/federation/well_known_resolver.py
@@ -110,10 +110,6 @@ async def get_well_known(self, server_name: bytes) -> WellKnownLookupResult:
         Returns:
             The result of the lookup
         """
-
-        if server_name == b"kde.org":
-            return WellKnownLookupResult(delegated_server=b"kde.modular.im:443")
-
         try:
             prev_result, expiry, ttl = self._well_known_cache.get_with_expiry(
                 server_name

From b0a463f758a71810227c8aa5d92023a549e6810a Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 1 Oct 2020 15:53:02 +0100
Subject: [PATCH 122/278] fix remote thumbnails?

---
 synapse/rest/media/v1/media_storage.py | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index 5681677fc93..4827cb2a3f5 100644
--- a/synapse/rest/media/v1/media_storage.py
+++ b/synapse/rest/media/v1/media_storage.py
@@ -143,12 +143,9 @@ async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
         """
 
         path = self._file_info_to_path(file_info)
-        local_path = os.path.join(self.local_media_directory, path)
-        if os.path.exists(local_path):
-            return FileResponder(open(local_path, "rb"))
 
-        # Fallback for paths without method names
-        # Should be removed in the future
+        # fallback for remote thumbnails with no method in the filename
+        legacy_path = None
         if file_info.thumbnail and file_info.server_name:
             legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy(
                 server_name=file_info.server_name,
@@ -157,8 +154,19 @@ async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
                 height=file_info.thumbnail_height,
                 content_type=file_info.thumbnail_type,
             )
+
+        local_path = os.path.join(self.local_media_directory, path)
+        if os.path.exists(local_path):
+            logger.debug("responding with local file %s", local_path)
+            return FileResponder(open(local_path, "rb"))
+
+        if legacy_path:
+            logger.debug(
+                "local file %s did not exist; checking legacy name", local_path
+            )
             legacy_local_path = os.path.join(self.local_media_directory, legacy_path)
             if os.path.exists(legacy_local_path):
+                logger.debug("responding with local file %s", legacy_local_path)
                 return FileResponder(open(legacy_local_path, "rb"))
 
         for provider in self.storage_providers:
@@ -166,6 +174,14 @@ async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
             if res:
                 logger.debug("Streaming %s from %s", path, provider)
                 return res
+            if legacy_path:
+                logger.debug(
+                    "Provider %s did not find %s; checking legacy name", provider, path
+                )
+                res = await provider.fetch(legacy_path, file_info)
+                if res:
+                    logger.debug("Streaming %s from %s", legacy_path, provider)
+                    return res
 
         return None
 

From 5ccc0785c1a4c18517582034b00526873d41e280 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 2 Oct 2020 12:30:49 +0100
Subject: [PATCH 123/278] Revert "fix remote thumbnails?"

This has now been fixed by a different commit (73d93039f).

This reverts commit b0a463f758a71810227c8aa5d92023a549e6810a.
---
 synapse/rest/media/v1/media_storage.py | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index 4827cb2a3f5..5681677fc93 100644
--- a/synapse/rest/media/v1/media_storage.py
+++ b/synapse/rest/media/v1/media_storage.py
@@ -143,9 +143,12 @@ async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
         """
 
         path = self._file_info_to_path(file_info)
+        local_path = os.path.join(self.local_media_directory, path)
+        if os.path.exists(local_path):
+            return FileResponder(open(local_path, "rb"))
 
-        # fallback for remote thumbnails with no method in the filename
-        legacy_path = None
+        # Fallback for paths without method names
+        # Should be removed in the future
         if file_info.thumbnail and file_info.server_name:
             legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy(
                 server_name=file_info.server_name,
@@ -154,19 +157,8 @@ async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
                 height=file_info.thumbnail_height,
                 content_type=file_info.thumbnail_type,
             )
-
-        local_path = os.path.join(self.local_media_directory, path)
-        if os.path.exists(local_path):
-            logger.debug("responding with local file %s", local_path)
-            return FileResponder(open(local_path, "rb"))
-
-        if legacy_path:
-            logger.debug(
-                "local file %s did not exist; checking legacy name", local_path
-            )
             legacy_local_path = os.path.join(self.local_media_directory, legacy_path)
             if os.path.exists(legacy_local_path):
-                logger.debug("responding with local file %s", legacy_local_path)
                 return FileResponder(open(legacy_local_path, "rb"))
 
         for provider in self.storage_providers:
@@ -174,14 +166,6 @@ async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
             if res:
                 logger.debug("Streaming %s from %s", path, provider)
                 return res
-            if legacy_path:
-                logger.debug(
-                    "Provider %s did not find %s; checking legacy name", provider, path
-                )
-                res = await provider.fetch(legacy_path, file_info)
-                if res:
-                    logger.debug("Streaming %s from %s", legacy_path, provider)
-                    return res
 
         return None
 

From cd2f831b9da91829efab01f31921fbd7fb738300 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Mon, 12 Oct 2020 19:09:30 +0100
Subject: [PATCH 124/278] block membership events from spammy freenode bridge

---
 synapse/rest/client/v1/room.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index b63389e5fed..862d05e3cae 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -291,6 +291,8 @@ def register(self, http_server):
 
     async def on_POST(self, request, room_identifier, txn_id=None):
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
+        if requester.app_service and requester.app_service.id == "irc-freenode":
+            raise SynapseError(400, "too much spam")
 
         try:
             content = parse_json_object_from_request(request)
@@ -720,6 +722,8 @@ def register(self, http_server):
 
     async def on_POST(self, request, room_id, membership_action, txn_id=None):
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
+        if requester.app_service and requester.app_service.id == "irc-freenode":
+            raise SynapseError(400, "too much spam")
 
         if requester.is_guest and membership_action not in {
             Membership.JOIN,

From bdbe2b12c216287708d7dce11c65c58cdb1d706b Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 13 Oct 2020 17:10:45 +0100
Subject: [PATCH 125/278] Revert "block membership events from spammy freenode
 bridge"

This reverts commit cd2f831b9da91829efab01f31921fbd7fb738300.
---
 synapse/rest/client/v1/room.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index d2349ecfdd0..00b43970822 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -290,8 +290,6 @@ def register(self, http_server):
 
     async def on_POST(self, request, room_identifier, txn_id=None):
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
-        if requester.app_service and requester.app_service.id == "irc-freenode":
-            raise SynapseError(400, "too much spam")
 
         try:
             content = parse_json_object_from_request(request)
@@ -721,8 +719,6 @@ def register(self, http_server):
 
     async def on_POST(self, request, room_id, membership_action, txn_id=None):
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
-        if requester.app_service and requester.app_service.id == "irc-freenode":
-            raise SynapseError(400, "too much spam")
 
         if requester.is_guest and membership_action not in {
             Membership.JOIN,

From d60af9305a07fadcf0270d1887c5b7d063834967 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Wed, 28 Oct 2020 11:58:47 +0000
Subject: [PATCH 126/278] Patch to temporarily drop cross-user
 m.key_share_requests (#8675)

Cross-user `m.key_share_requests` are a relatively new `to_device` message that allows user to re-request session keys for a message from another user if they were otherwise unable to retrieve them.

Unfortunately, these have had performance concerns on matrix.org. This is a temporary patch to disable them while we investigate a better solution.
---
 changelog.d/8675.misc                   | 1 +
 synapse/federation/federation_server.py | 4 ++++
 synapse/handlers/devicemessage.py       | 4 ++++
 3 files changed, 9 insertions(+)
 create mode 100644 changelog.d/8675.misc

diff --git a/changelog.d/8675.misc b/changelog.d/8675.misc
new file mode 100644
index 00000000000..7ffe38b7d9b
--- /dev/null
+++ b/changelog.d/8675.misc
@@ -0,0 +1 @@
+Temporarily drop cross-user m.room_key_request to_device messages over performance concerns.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 23278e36b73..b7459a1d87a 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -915,6 +915,10 @@ async def on_edu(self, edu_type: str, origin: str, content: dict):
         if not self.config.use_presence and edu_type == "m.presence":
             return
 
+        # Temporary patch to drop cross-user key share requests
+        if edu_type == "m.room_key_request":
+            return
+
         # Check if we have a handler on this instance
         handler = self.edu_handlers.get(edu_type)
         if handler:
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index 9cac5a84639..5aa56013a4b 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -153,6 +153,10 @@ async def send_device_message(
         local_messages = {}
         remote_messages = {}  # type: Dict[str, Dict[str, Dict[str, JsonDict]]]
         for user_id, by_device in messages.items():
+            # Temporary patch to disable sending local cross-user key requests.
+            if message_type == "m.room_key_request" and user_id != sender_user_id:
+                continue
+
             # we use UserID.from_string to catch invalid user ids
             if self.is_mine(UserID.from_string(user_id)):
                 messages_by_device = {

From 8373e6254f683e4649a941f951d30b1255237264 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Sat, 31 Oct 2020 10:50:28 +0000
Subject: [PATCH 127/278] Fix SIGHUP handler

Fixes:

```
builtins.TypeError: _reload_logging_config() takes 1 positional argument but 2 were given
```
---
 changelog.d/8697.misc | 1 +
 synapse/app/_base.py  | 5 ++---
 2 files changed, 3 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/8697.misc

diff --git a/changelog.d/8697.misc b/changelog.d/8697.misc
new file mode 100644
index 00000000000..7982a4e46db
--- /dev/null
+++ b/changelog.d/8697.misc
@@ -0,0 +1 @@
+ Re-organize the structured logging code to separate the TCP transport handling from the JSON formatting.
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index f6f7b2bf42c..9c8dc785c66 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -49,7 +49,6 @@ def register_sighup(func, *args, **kwargs):
 
     Args:
         func (function): Function to be called when sent a SIGHUP signal.
-            Will be called with a single default argument, the homeserver.
         *args, **kwargs: args and kwargs to be passed to the target function.
     """
     _sighup_callbacks.append((func, args, kwargs))
@@ -251,13 +250,13 @@ def handle_sighup(*args, **kwargs):
                 sdnotify(b"RELOADING=1")
 
                 for i, args, kwargs in _sighup_callbacks:
-                    i(hs, *args, **kwargs)
+                    i(*args, **kwargs)
 
                 sdnotify(b"READY=1")
 
             signal.signal(signal.SIGHUP, handle_sighup)
 
-            register_sighup(refresh_certificate)
+            register_sighup(refresh_certificate, hs)
 
         # Load the certificate from disk.
         refresh_certificate(hs)

From f4f65f4e992cf27ca76be96af1831f9dc41f4759 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 29 Dec 2020 07:40:12 -0500
Subject: [PATCH 128/278] Allow redacting events on workers (#8994)

Adds the redacts endpoint to workers that have the client listener.
---
 changelog.d/8994.feature       |  1 +
 docs/workers.md                |  1 +
 synapse/app/generic_worker.py  | 31 ++++---------------------------
 synapse/rest/client/v1/room.py | 17 ++++++++++-------
 4 files changed, 16 insertions(+), 34 deletions(-)
 create mode 100644 changelog.d/8994.feature

diff --git a/changelog.d/8994.feature b/changelog.d/8994.feature
new file mode 100644
index 00000000000..76aeb185cb6
--- /dev/null
+++ b/changelog.d/8994.feature
@@ -0,0 +1 @@
+Allow running the redact endpoint on workers.
diff --git a/docs/workers.md b/docs/workers.md
index efe97af31a0..298adf8695c 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -229,6 +229,7 @@ expressions:
     ^/_matrix/client/(r0|unstable)/auth/.*/fallback/web$
 
     # Event sending requests
+    ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/redact
     ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send
     ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state/
     ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index aa12c74358a..fa23d9bb20e 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -89,7 +89,7 @@
     ToDeviceStream,
 )
 from synapse.rest.admin import register_servlets_for_media_repo
-from synapse.rest.client.v1 import events
+from synapse.rest.client.v1 import events, room
 from synapse.rest.client.v1.initial_sync import InitialSyncRestServlet
 from synapse.rest.client.v1.login import LoginRestServlet
 from synapse.rest.client.v1.profile import (
@@ -98,20 +98,6 @@
     ProfileRestServlet,
 )
 from synapse.rest.client.v1.push_rule import PushRuleRestServlet
-from synapse.rest.client.v1.room import (
-    JoinedRoomMemberListRestServlet,
-    JoinRoomAliasServlet,
-    PublicRoomListRestServlet,
-    RoomEventContextServlet,
-    RoomInitialSyncRestServlet,
-    RoomMemberListRestServlet,
-    RoomMembershipRestServlet,
-    RoomMessageListRestServlet,
-    RoomSendEventRestServlet,
-    RoomStateEventRestServlet,
-    RoomStateRestServlet,
-    RoomTypingRestServlet,
-)
 from synapse.rest.client.v1.voip import VoipRestServlet
 from synapse.rest.client.v2_alpha import groups, sync, user_directory
 from synapse.rest.client.v2_alpha._base import client_patterns
@@ -512,12 +498,6 @@ def _listen_http(self, listener_config: ListenerConfig):
                 elif name == "client":
                     resource = JsonResource(self, canonical_json=False)
 
-                    PublicRoomListRestServlet(self).register(resource)
-                    RoomMemberListRestServlet(self).register(resource)
-                    JoinedRoomMemberListRestServlet(self).register(resource)
-                    RoomStateRestServlet(self).register(resource)
-                    RoomEventContextServlet(self).register(resource)
-                    RoomMessageListRestServlet(self).register(resource)
                     RegisterRestServlet(self).register(resource)
                     LoginRestServlet(self).register(resource)
                     ThreepidRestServlet(self).register(resource)
@@ -526,22 +506,19 @@ def _listen_http(self, listener_config: ListenerConfig):
                     VoipRestServlet(self).register(resource)
                     PushRuleRestServlet(self).register(resource)
                     VersionsRestServlet(self).register(resource)
-                    RoomSendEventRestServlet(self).register(resource)
-                    RoomMembershipRestServlet(self).register(resource)
-                    RoomStateEventRestServlet(self).register(resource)
-                    JoinRoomAliasServlet(self).register(resource)
+
                     ProfileAvatarURLRestServlet(self).register(resource)
                     ProfileDisplaynameRestServlet(self).register(resource)
                     ProfileRestServlet(self).register(resource)
                     KeyUploadServlet(self).register(resource)
                     AccountDataServlet(self).register(resource)
                     RoomAccountDataServlet(self).register(resource)
-                    RoomTypingRestServlet(self).register(resource)
 
                     sync.register_servlets(self, resource)
                     events.register_servlets(self, resource)
+                    room.register_servlets(self, resource, True)
+                    room.register_deprecated_servlets(self, resource)
                     InitialSyncRestServlet(self).register(resource)
-                    RoomInitialSyncRestServlet(self).register(resource)
 
                     user_directory.register_servlets(self, resource)
 
diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py
index 93c06afe270..5647e8c5777 100644
--- a/synapse/rest/client/v1/room.py
+++ b/synapse/rest/client/v1/room.py
@@ -963,25 +963,28 @@ def register_txn_path(servlet, regex_string, http_server, with_get=False):
         )
 
 
-def register_servlets(hs, http_server):
+def register_servlets(hs, http_server, is_worker=False):
     RoomStateEventRestServlet(hs).register(http_server)
-    RoomCreateRestServlet(hs).register(http_server)
     RoomMemberListRestServlet(hs).register(http_server)
     JoinedRoomMemberListRestServlet(hs).register(http_server)
     RoomMessageListRestServlet(hs).register(http_server)
     JoinRoomAliasServlet(hs).register(http_server)
-    RoomForgetRestServlet(hs).register(http_server)
     RoomMembershipRestServlet(hs).register(http_server)
     RoomSendEventRestServlet(hs).register(http_server)
     PublicRoomListRestServlet(hs).register(http_server)
     RoomStateRestServlet(hs).register(http_server)
     RoomRedactEventRestServlet(hs).register(http_server)
     RoomTypingRestServlet(hs).register(http_server)
-    SearchRestServlet(hs).register(http_server)
-    JoinedRoomsRestServlet(hs).register(http_server)
-    RoomEventServlet(hs).register(http_server)
     RoomEventContextServlet(hs).register(http_server)
-    RoomAliasListServlet(hs).register(http_server)
+
+    # Some servlets only get registered for the main process.
+    if not is_worker:
+        RoomCreateRestServlet(hs).register(http_server)
+        RoomForgetRestServlet(hs).register(http_server)
+        SearchRestServlet(hs).register(http_server)
+        JoinedRoomsRestServlet(hs).register(http_server)
+        RoomEventServlet(hs).register(http_server)
+        RoomAliasListServlet(hs).register(http_server)
 
 
 def register_deprecated_servlets(hs, http_server):

From 5a4f09228d893fcc164ae47654446ff723ba1ccf Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 5 Jan 2021 13:52:36 +0000
Subject: [PATCH 129/278] Remove cache from room directory query results

This reverts a285fe0. Hopefully the cache is no longer required, thanks to
---
 synapse/handlers/room_list.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index a2c0340a3c1..70522e40fab 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -45,7 +45,7 @@ def __init__(self, hs: "HomeServer"):
         self.enable_room_list_search = hs.config.enable_room_list_search
 
         self.response_cache = ResponseCache(
-            hs, "room_list", timeout_ms=10 * 60 * 1000
+            hs, "room_list"
         )  # type: ResponseCache[Tuple[Optional[int], Optional[str], ThirdPartyInstanceID]]
         self.remote_response_cache = ResponseCache(
             hs, "remote_room_list", timeout_ms=30 * 1000

From 3f6530ed5575a9ea574e026dbd4fe59caf3afebe Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Sat, 6 Feb 2021 11:02:53 +0000
Subject: [PATCH 130/278] block groups requests to fosdem

---
 synapse/handlers/groups_local.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py
index 71f11ef94aa..df040a675e1 100644
--- a/synapse/handlers/groups_local.py
+++ b/synapse/handlers/groups_local.py
@@ -42,6 +42,9 @@ async def f(self, group_id, *args, **kwargs):
         else:
             destination = get_domain_from_id(group_id)
 
+            if destination == "fosdem.org":
+                raise SynapseError(502, "Failed to contact group server")
+
             try:
                 return await getattr(self.transport_client, func_name)(
                     destination, group_id, *args, **kwargs

From 844b3e3f65eb5d6fb03f5ee94ef70befeffcdd6b Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Sat, 6 Feb 2021 12:03:46 +0000
Subject: [PATCH 131/278] Revert "block groups requests to fosdem"

This reverts commit 3f6530ed5575a9ea574e026dbd4fe59caf3afebe.
---
 synapse/handlers/groups_local.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py
index df040a675e1..71f11ef94aa 100644
--- a/synapse/handlers/groups_local.py
+++ b/synapse/handlers/groups_local.py
@@ -42,9 +42,6 @@ async def f(self, group_id, *args, **kwargs):
         else:
             destination = get_domain_from_id(group_id)
 
-            if destination == "fosdem.org":
-                raise SynapseError(502, "Failed to contact group server")
-
             try:
                 return await getattr(self.transport_client, func_name)(
                     destination, group_id, *args, **kwargs

From a4aa56a0eb46ab43683e2569d8b7ca52f9715afa Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 11 Feb 2021 16:06:29 +0000
Subject: [PATCH 132/278] Ensure that we never stop reconnecting to redis
 (#9391)

---
 changelog.d/9391.bugfix          |  1 +
 synapse/replication/tcp/redis.py | 26 ++++++++++++++++++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/9391.bugfix

diff --git a/changelog.d/9391.bugfix b/changelog.d/9391.bugfix
new file mode 100644
index 00000000000..b5e68e2ac77
--- /dev/null
+++ b/changelog.d/9391.bugfix
@@ -0,0 +1 @@
+Fix bug where Synapse would occaisonally stop reconnecting after the connection was lost.
diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py
index fdd087683b1..89f8af0f364 100644
--- a/synapse/replication/tcp/redis.py
+++ b/synapse/replication/tcp/redis.py
@@ -15,8 +15,9 @@
 
 import logging
 from inspect import isawaitable
-from typing import TYPE_CHECKING, Optional, Type, cast
+from typing import TYPE_CHECKING, Generic, Optional, Type, TypeVar, cast
 
+import attr
 import txredisapi
 
 from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
@@ -42,6 +43,24 @@
 
 logger = logging.getLogger(__name__)
 
+T = TypeVar("T")
+V = TypeVar("V")
+
+
+@attr.s
+class ConstantProperty(Generic[T, V]):
+    """A descriptor that returns the given constant, ignoring attempts to set
+    it.
+    """
+
+    constant = attr.ib()  # type: V
+
+    def __get__(self, obj: Optional[T], objtype: Type[T] = None) -> V:
+        return self.constant
+
+    def __set__(self, obj: Optional[T], value: V):
+        pass
+
 
 class RedisSubscriber(txredisapi.SubscriberProtocol, AbstractConnection):
     """Connection to redis subscribed to replication stream.
@@ -195,6 +214,10 @@ class SynapseRedisFactory(txredisapi.RedisFactory):
     we detect dead connections.
     """
 
+    # We want to *always* retry connecting, txredisapi will stop if there is a
+    # failure during certain operations, e.g. during AUTH.
+    continueTrying = cast(bool, ConstantProperty(True))
+
     def __init__(
         self,
         hs: "HomeServer",
@@ -243,7 +266,6 @@ class RedisDirectTcpReplicationClientFactory(SynapseRedisFactory):
     """
 
     maxDelay = 5
-    continueTrying = True
     protocol = RedisSubscriber
 
     def __init__(

From 5ee8a1c50a1b571a8a8704a59635232193b454f2 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 18 Feb 2021 14:01:23 +0000
Subject: [PATCH 133/278] Redirect redirect requests if they arrive on the
 wrong URI

---
 synapse/rest/client/v1/login.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index 6e2fbedd99b..3e6a21e20ff 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -354,6 +354,7 @@ def __init__(self, hs: "HomeServer"):
             hs.get_oidc_handler()
         self._sso_handler = hs.get_sso_handler()
         self._msc2858_enabled = hs.config.experimental.msc2858_enabled
+        self._public_baseurl = hs.config.public_baseurl
 
     def register(self, http_server: HttpServer) -> None:
         super().register(http_server)
@@ -373,6 +374,28 @@ def register(self, http_server: HttpServer) -> None:
     async def on_GET(
         self, request: SynapseRequest, idp_id: Optional[str] = None
     ) -> None:
+        if not self._public_baseurl:
+            raise SynapseError(400, "SSO requires a valid public_baseurl")
+
+        # if this isn't the expected hostname, redirect to the right one, so that we
+        # get our cookies back.
+        requested_uri = b"%s://%s%s" % (
+            b"https" if request.isSecure() else b"http",
+            request.getHeader(b"host"),
+            request.uri,
+        )
+        baseurl_bytes = self._public_baseurl.encode("utf-8")
+        if not requested_uri.startswith(baseurl_bytes):
+            i = requested_uri.index(b"/_matrix")
+            new_uri = baseurl_bytes[:-1] + requested_uri[i:]
+            logger.info(
+                "Requested URI %s is not canonical: redirecting to %s",
+                requested_uri.decode("utf-8", errors="replace"),
+                new_uri.decode("utf-8", errors="replace"),
+            )
+            request.redirect(new_uri)
+            finish_request(request)
+
         client_redirect_url = parse_string(
             request, "redirectUrl", required=True, encoding=None
         )

From 47d2b49e2b938a1c0c2e13830505a6d019ee65fe Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 18 Feb 2021 14:29:48 +0000
Subject: [PATCH 134/278] more login hacking

---
 synapse/http/site.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/synapse/http/site.py b/synapse/http/site.py
index 4a4fb5ef264..7421c172e48 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -333,14 +333,28 @@ def _should_log_request(self) -> bool:
 
 
 class XForwardedForRequest(SynapseRequest):
-    def __init__(self, *args, **kw):
-        SynapseRequest.__init__(self, *args, **kw)
-
     """
     Add a layer on top of another request that only uses the value of an
     X-Forwarded-For header as the result of C{getClientIP}.
+
+    XXX: I think the right way to do this is with request.setHost().
     """
 
+    def __init__(self, *args, **kw):
+        SynapseRequest.__init__(self, *args, **kw)
+
+        forwarded_header = self.getHeader(b"x-forwarded-proto")
+        if forwarded_header is not None:
+            self._is_secure = forwarded_header.lower() == b"https"
+        else:
+            logger.warning(
+                "received request lacks an x-forwarded-proto header: assuming https"
+            )
+            self._is_secure = True
+
+    def isSecure(self):
+        return self._is_secure
+
     def getClientIP(self):
         """
         @return: The client address (the first address) in the value of the

From c7934aee2caaed19200450cea1e94c2969b78a26 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 26 Feb 2021 14:04:05 +0000
Subject: [PATCH 135/278] Revert "more login hacking"

This reverts commit 47d2b49e2b938a1c0c2e13830505a6d019ee65fe.

This has now been superceded on develop by PR 9472.
---
 synapse/http/site.py | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/synapse/http/site.py b/synapse/http/site.py
index 7421c172e48..4a4fb5ef264 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -333,28 +333,14 @@ def _should_log_request(self) -> bool:
 
 
 class XForwardedForRequest(SynapseRequest):
+    def __init__(self, *args, **kw):
+        SynapseRequest.__init__(self, *args, **kw)
+
     """
     Add a layer on top of another request that only uses the value of an
     X-Forwarded-For header as the result of C{getClientIP}.
-
-    XXX: I think the right way to do this is with request.setHost().
     """
 
-    def __init__(self, *args, **kw):
-        SynapseRequest.__init__(self, *args, **kw)
-
-        forwarded_header = self.getHeader(b"x-forwarded-proto")
-        if forwarded_header is not None:
-            self._is_secure = forwarded_header.lower() == b"https"
-        else:
-            logger.warning(
-                "received request lacks an x-forwarded-proto header: assuming https"
-            )
-            self._is_secure = True
-
-    def isSecure(self):
-        return self._is_secure
-
     def getClientIP(self):
         """
         @return: The client address (the first address) in the value of the

From 0e56f02d5d8346f9bacd449d25c5ff7921943ef0 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 26 Feb 2021 14:05:00 +0000
Subject: [PATCH 136/278] Revert "Redirect redirect requests if they arrive on
 the wrong URI"

This reverts commit 5ee8a1c50a1b571a8a8704a59635232193b454f2.

This has now been superceded on develop by PR #9436.
---
 synapse/rest/client/v1/login.py | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py
index 3e6a21e20ff..6e2fbedd99b 100644
--- a/synapse/rest/client/v1/login.py
+++ b/synapse/rest/client/v1/login.py
@@ -354,7 +354,6 @@ def __init__(self, hs: "HomeServer"):
             hs.get_oidc_handler()
         self._sso_handler = hs.get_sso_handler()
         self._msc2858_enabled = hs.config.experimental.msc2858_enabled
-        self._public_baseurl = hs.config.public_baseurl
 
     def register(self, http_server: HttpServer) -> None:
         super().register(http_server)
@@ -374,28 +373,6 @@ def register(self, http_server: HttpServer) -> None:
     async def on_GET(
         self, request: SynapseRequest, idp_id: Optional[str] = None
     ) -> None:
-        if not self._public_baseurl:
-            raise SynapseError(400, "SSO requires a valid public_baseurl")
-
-        # if this isn't the expected hostname, redirect to the right one, so that we
-        # get our cookies back.
-        requested_uri = b"%s://%s%s" % (
-            b"https" if request.isSecure() else b"http",
-            request.getHeader(b"host"),
-            request.uri,
-        )
-        baseurl_bytes = self._public_baseurl.encode("utf-8")
-        if not requested_uri.startswith(baseurl_bytes):
-            i = requested_uri.index(b"/_matrix")
-            new_uri = baseurl_bytes[:-1] + requested_uri[i:]
-            logger.info(
-                "Requested URI %s is not canonical: redirecting to %s",
-                requested_uri.decode("utf-8", errors="replace"),
-                new_uri.decode("utf-8", errors="replace"),
-            )
-            request.redirect(new_uri)
-            finish_request(request)
-
         client_redirect_url = parse_string(
             request, "redirectUrl", required=True, encoding=None
         )

From 6557eba7dc63e50e81c07805770a09ac856233b0 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Tue, 23 Mar 2021 10:24:03 +0000
Subject: [PATCH 137/278] Revert "Patch to temporarily drop cross-user
 m.key_share_requests (#8675)"

This reverts commit d60af9305a07fadcf0270d1887c5b7d063834967.
---
 changelog.d/8675.misc                   | 1 -
 synapse/federation/federation_server.py | 4 ----
 2 files changed, 5 deletions(-)
 delete mode 100644 changelog.d/8675.misc

diff --git a/changelog.d/8675.misc b/changelog.d/8675.misc
deleted file mode 100644
index 7ffe38b7d9b..00000000000
--- a/changelog.d/8675.misc
+++ /dev/null
@@ -1 +0,0 @@
-Temporarily drop cross-user m.room_key_request to_device messages over performance concerns.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 98caf2a1a44..9839d3d0160 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -955,10 +955,6 @@ async def on_edu(self, edu_type: str, origin: str, content: dict):
         ):
             return
 
-        # Temporary patch to drop cross-user key share requests
-        if edu_type == "m.room_key_request":
-            return
-
         # Check if we have a handler on this instance
         handler = self.edu_handlers.get(edu_type)
         if handler:

From 05ec9e8d37540711ef50e85e6f7bc5fa13ec8f55 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Tue, 23 Mar 2021 10:53:05 +0000
Subject: [PATCH 138/278] Revert "Patch to temporarily drop cross-user
 m.key_share_requests (#8675)" (#9668)

We patched `matrix-org-hotfixes` a little while ago in #8675 to drop any cross-user key share requests while they were being accidentally spammed by a client. This was a temporary fix until we had some rate-limiting in place.

Rate-limiting landed in https://github.com/matrix-org/synapse/pull/8957. Note that the rate-limit can't be configured, but has what appear to be [sensible defaults](https://github.com/matrix-org/synapse/blob/db2efa9c50569adbfab102b1f447f5a8312b95f3/synapse/config/ratelimiting.py#L105-L113).

Note that the original patch was already actually overridden partially when the rate-limit PR landed, as they conflicted. So we've already lifted the restriction between local devices on matrix.org, but requests were still blocked from being sent over federation. This PR cleans up the remaining bits.

This reverts commit d60af9305a07fadcf0270d1887c5b7d063834967.
---
 changelog.d/8675.misc                   | 1 -
 synapse/federation/federation_server.py | 4 ----
 2 files changed, 5 deletions(-)
 delete mode 100644 changelog.d/8675.misc

diff --git a/changelog.d/8675.misc b/changelog.d/8675.misc
deleted file mode 100644
index 7ffe38b7d9b..00000000000
--- a/changelog.d/8675.misc
+++ /dev/null
@@ -1 +0,0 @@
-Temporarily drop cross-user m.room_key_request to_device messages over performance concerns.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 98caf2a1a44..9839d3d0160 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -955,10 +955,6 @@ async def on_edu(self, edu_type: str, origin: str, content: dict):
         ):
             return
 
-        # Temporary patch to drop cross-user key share requests
-        if edu_type == "m.room_key_request":
-            return
-
         # Check if we have a handler on this instance
         handler = self.edu_handlers.get(edu_type)
         if handler:

From edac710bc0c4dc1cd226d9ffe73a00b42c2b67d8 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Mon, 19 Apr 2021 18:56:53 +0100
Subject: [PATCH 139/278] improve efficiency of _glob_to_re

---
 synapse/push/push_rule_evaluator.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py
index 49ecb38522a..ae077af5b5c 100644
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@@ -230,7 +230,8 @@ def _glob_to_re(glob: str, word_boundary: bool) -> Pattern:
     if IS_GLOB.search(glob):
         r = re.escape(glob)
 
-        r = r.replace(r"\*", ".*?")
+        # replace 1 or more repeats of `\*` with `.*?`
+        r = re.sub(r"(\\\*)+", ".*?", r)
         r = r.replace(r"\?", ".")
 
         # handle [abc], [a-z] and [!a-z] style ranges.

From 9979fef4fef1220515c4d1916c70cd461984f035 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Tue, 27 Apr 2021 13:47:39 +0100
Subject: [PATCH 140/278] Revert "Experimental Federation Speedup (#9702)"

This reverts commit 05e8c70c059f8ebb066e029bc3aa3e0cefef1019.
---
 changelog.d/9702.misc                         |   1 -
 contrib/experiments/test_messaging.py         |  42 +++---
 synapse/federation/sender/__init__.py         | 137 +++++++-----------
 .../sender/per_destination_queue.py           |  15 +-
 .../storage/databases/main/transactions.py    |  28 ++--
 5 files changed, 95 insertions(+), 128 deletions(-)
 delete mode 100644 changelog.d/9702.misc

diff --git a/changelog.d/9702.misc b/changelog.d/9702.misc
deleted file mode 100644
index c6e63450a97..00000000000
--- a/changelog.d/9702.misc
+++ /dev/null
@@ -1 +0,0 @@
-Speed up federation transmission by using fewer database calls. Contributed by @ShadowJonathan.
diff --git a/contrib/experiments/test_messaging.py b/contrib/experiments/test_messaging.py
index 5dd172052b9..31b8a682250 100644
--- a/contrib/experiments/test_messaging.py
+++ b/contrib/experiments/test_messaging.py
@@ -224,16 +224,14 @@ def send_message(self, room_name, sender, body):
         destinations = yield self.get_servers_for_context(room_name)
 
         try:
-            yield self.replication_layer.send_pdus(
-                [
-                    Pdu.create_new(
-                        context=room_name,
-                        pdu_type="sy.room.message",
-                        content={"sender": sender, "body": body},
-                        origin=self.server_name,
-                        destinations=destinations,
-                    )
-                ]
+            yield self.replication_layer.send_pdu(
+                Pdu.create_new(
+                    context=room_name,
+                    pdu_type="sy.room.message",
+                    content={"sender": sender, "body": body},
+                    origin=self.server_name,
+                    destinations=destinations,
+                )
             )
         except Exception as e:
             logger.exception(e)
@@ -255,7 +253,7 @@ def join_room(self, room_name, sender, joinee):
                 origin=self.server_name,
                 destinations=destinations,
             )
-            yield self.replication_layer.send_pdus([pdu])
+            yield self.replication_layer.send_pdu(pdu)
         except Exception as e:
             logger.exception(e)
 
@@ -267,18 +265,16 @@ def invite_to_room(self, room_name, sender, invitee):
         destinations = yield self.get_servers_for_context(room_name)
 
         try:
-            yield self.replication_layer.send_pdus(
-                [
-                    Pdu.create_new(
-                        context=room_name,
-                        is_state=True,
-                        pdu_type="sy.room.member",
-                        state_key=invitee,
-                        content={"membership": "invite"},
-                        origin=self.server_name,
-                        destinations=destinations,
-                    )
-                ]
+            yield self.replication_layer.send_pdu(
+                Pdu.create_new(
+                    context=room_name,
+                    is_state=True,
+                    pdu_type="sy.room.member",
+                    state_key=invitee,
+                    content={"membership": "invite"},
+                    origin=self.server_name,
+                    destinations=destinations,
+                )
             )
         except Exception as e:
             logger.exception(e)
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 022bbf7dad4..088260c2e9b 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -16,7 +16,6 @@
 import logging
 from typing import (
     TYPE_CHECKING,
-    Collection,
     Dict,
     Hashable,
     Iterable,
@@ -28,12 +27,18 @@
 
 from prometheus_client import Counter
 
+from twisted.internet import defer
+
 import synapse.metrics
 from synapse.api.presence import UserPresenceState
 from synapse.events import EventBase
 from synapse.federation.sender.per_destination_queue import PerDestinationQueue
 from synapse.federation.sender.transaction_manager import TransactionManager
 from synapse.federation.units import Edu
+from synapse.logging.context import (
+    make_deferred_yieldable,
+    run_in_background,
+)
 from synapse.metrics import (
     LaterGauge,
     event_processing_loop_counter,
@@ -262,27 +267,15 @@ async def _process_event_queue_loop(self) -> None:
                 if not events and next_token >= self._last_poked_id:
                     break
 
-                async def get_destinations_for_event(
-                    event: EventBase,
-                ) -> Collection[str]:
-                    """Computes the destinations to which this event must be sent.
-
-                    This returns an empty tuple when there are no destinations to send to,
-                    or if this event is not from this homeserver and it is not sending
-                    it on behalf of another server.
-
-                    Will also filter out destinations which this sender is not responsible for,
-                    if multiple federation senders exist.
-                    """
-
+                async def handle_event(event: EventBase) -> None:
                     # Only send events for this server.
                     send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of()
                     is_mine = self.is_mine_id(event.sender)
                     if not is_mine and send_on_behalf_of is None:
-                        return ()
+                        return
 
                     if not event.internal_metadata.should_proactively_send():
-                        return ()
+                        return
 
                     destinations = None  # type: Optional[Set[str]]
                     if not event.prev_event_ids():
@@ -317,7 +310,7 @@ async def get_destinations_for_event(
                                 "Failed to calculate hosts in room for event: %s",
                                 event.event_id,
                             )
-                            return ()
+                            return
 
                     destinations = {
                         d
@@ -327,15 +320,17 @@ async def get_destinations_for_event(
                         )
                     }
 
-                    destinations.discard(self.server_name)
-
                     if send_on_behalf_of is not None:
                         # If we are sending the event on behalf of another server
                         # then it already has the event and there is no reason to
                         # send the event to it.
                         destinations.discard(send_on_behalf_of)
 
+                    logger.debug("Sending %s to %r", event, destinations)
+
                     if destinations:
+                        await self._send_pdu(event, destinations)
+
                         now = self.clock.time_msec()
                         ts = await self.store.get_received_ts(event.event_id)
 
@@ -343,29 +338,24 @@ async def get_destinations_for_event(
                             "federation_sender"
                         ).observe((now - ts) / 1000)
 
-                        return destinations
-                    return ()
-
-                async def get_federatable_events_and_destinations(
-                    events: Iterable[EventBase],
-                ) -> List[Tuple[EventBase, Collection[str]]]:
-                    with Measure(self.clock, "get_destinations_for_events"):
-                        # Fetch federation destinations per event,
-                        # skip if get_destinations_for_event returns an empty collection,
-                        # return list of event->destinations pairs.
-                        return [
-                            (event, dests)
-                            for (event, dests) in [
-                                (event, await get_destinations_for_event(event))
-                                for event in events
-                            ]
-                            if dests
-                        ]
-
-                events_and_dests = await get_federatable_events_and_destinations(events)
-
-                # Send corresponding events to each destination queue
-                await self._distribute_events(events_and_dests)
+                async def handle_room_events(events: Iterable[EventBase]) -> None:
+                    with Measure(self.clock, "handle_room_events"):
+                        for event in events:
+                            await handle_event(event)
+
+                events_by_room = {}  # type: Dict[str, List[EventBase]]
+                for event in events:
+                    events_by_room.setdefault(event.room_id, []).append(event)
+
+                await make_deferred_yieldable(
+                    defer.gatherResults(
+                        [
+                            run_in_background(handle_room_events, evs)
+                            for evs in events_by_room.values()
+                        ],
+                        consumeErrors=True,
+                    )
+                )
 
                 await self.store.update_federation_out_pos("events", next_token)
 
@@ -383,7 +373,7 @@ async def get_federatable_events_and_destinations(
                     events_processed_counter.inc(len(events))
 
                     event_processing_loop_room_count.labels("federation_sender").inc(
-                        len({event.room_id for event in events})
+                        len(events_by_room)
                     )
 
                 event_processing_loop_counter.labels("federation_sender").inc()
@@ -395,53 +385,34 @@ async def get_federatable_events_and_destinations(
         finally:
             self._is_processing = False
 
-    async def _distribute_events(
-        self,
-        events_and_dests: Iterable[Tuple[EventBase, Collection[str]]],
-    ) -> None:
-        """Distribute events to the respective per_destination queues.
-
-        Also persists last-seen per-room stream_ordering to 'destination_rooms'.
-
-        Args:
-            events_and_dests: A list of tuples, which are (event: EventBase, destinations: Collection[str]).
-                              Every event is paired with its intended destinations (in federation).
-        """
-        # Tuples of room_id + destination to their max-seen stream_ordering
-        room_with_dest_stream_ordering = {}  # type: Dict[Tuple[str, str], int]
-
-        # List of events to send to each destination
-        events_by_dest = {}  # type: Dict[str, List[EventBase]]
+    async def _send_pdu(self, pdu: EventBase, destinations: Iterable[str]) -> None:
+        # We loop through all destinations to see whether we already have
+        # a transaction in progress. If we do, stick it in the pending_pdus
+        # table and we'll get back to it later.
 
-        # For each event-destinations pair...
-        for event, destinations in events_and_dests:
+        destinations = set(destinations)
+        destinations.discard(self.server_name)
+        logger.debug("Sending to: %s", str(destinations))
 
-            # (we got this from the database, it's filled)
-            assert event.internal_metadata.stream_ordering
-
-            sent_pdus_destination_dist_total.inc(len(destinations))
-            sent_pdus_destination_dist_count.inc()
+        if not destinations:
+            return
 
-            # ...iterate over those destinations..
-            for destination in destinations:
-                # ...update their stream-ordering...
-                room_with_dest_stream_ordering[(event.room_id, destination)] = max(
-                    event.internal_metadata.stream_ordering,
-                    room_with_dest_stream_ordering.get((event.room_id, destination), 0),
-                )
+        sent_pdus_destination_dist_total.inc(len(destinations))
+        sent_pdus_destination_dist_count.inc()
 
-                # ...and add the event to each destination queue.
-                events_by_dest.setdefault(destination, []).append(event)
+        assert pdu.internal_metadata.stream_ordering
 
-        # Bulk-store destination_rooms stream_ids
-        await self.store.bulk_store_destination_rooms_entries(
-            room_with_dest_stream_ordering
+        # track the fact that we have a PDU for these destinations,
+        # to allow us to perform catch-up later on if the remote is unreachable
+        # for a while.
+        await self.store.store_destination_rooms_entries(
+            destinations,
+            pdu.room_id,
+            pdu.internal_metadata.stream_ordering,
         )
 
-        for destination, pdus in events_by_dest.items():
-            logger.debug("Sending %d pdus to %s", len(pdus), destination)
-
-            self._get_per_destination_queue(destination).send_pdus(pdus)
+        for destination in destinations:
+            self._get_per_destination_queue(destination).send_pdu(pdu)
 
     async def send_read_receipt(self, receipt: ReadReceipt) -> None:
         """Send a RR to any other servers in the room
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 3bb66bce324..3b053ebcfb0 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -154,22 +154,19 @@ def pending_edu_count(self) -> int:
             + len(self._pending_edus_keyed)
         )
 
-    def send_pdus(self, pdus: Iterable[EventBase]) -> None:
-        """Add PDUs to the queue, and start the transmission loop if necessary
+    def send_pdu(self, pdu: EventBase) -> None:
+        """Add a PDU to the queue, and start the transmission loop if necessary
 
         Args:
-            pdus: pdus to send
+            pdu: pdu to send
         """
         if not self._catching_up or self._last_successful_stream_ordering is None:
             # only enqueue the PDU if we are not catching up (False) or do not
             # yet know if we have anything to catch up (None)
-            self._pending_pdus.extend(pdus)
+            self._pending_pdus.append(pdu)
         else:
-            self._catchup_last_skipped = max(
-                pdu.internal_metadata.stream_ordering
-                for pdu in pdus
-                if pdu.internal_metadata.stream_ordering is not None
-            )
+            assert pdu.internal_metadata.stream_ordering
+            self._catchup_last_skipped = pdu.internal_metadata.stream_ordering
 
         self.attempt_new_transaction()
 
diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index b28ca61f806..82335e7a9da 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -14,7 +14,7 @@
 
 import logging
 from collections import namedtuple
-from typing import Dict, List, Optional, Tuple
+from typing import Iterable, List, Optional, Tuple
 
 from canonicaljson import encode_canonical_json
 
@@ -295,33 +295,37 @@ def _set_destination_retry_timings_emulated(
                 },
             )
 
-    async def bulk_store_destination_rooms_entries(
-        self, room_and_destination_to_ordering: Dict[Tuple[str, str], int]
-    ):
+    async def store_destination_rooms_entries(
+        self,
+        destinations: Iterable[str],
+        room_id: str,
+        stream_ordering: int,
+    ) -> None:
         """
-        Updates or creates `destination_rooms` entries for a number of events.
+        Updates or creates `destination_rooms` entries in batch for a single event.
 
         Args:
-            room_and_destination_to_ordering: A mapping of (room, destination) -> stream_id
+            destinations: list of destinations
+            room_id: the room_id of the event
+            stream_ordering: the stream_ordering of the event
         """
 
         await self.db_pool.simple_upsert_many(
             table="destinations",
             key_names=("destination",),
-            key_values={(d,) for _, d in room_and_destination_to_ordering.keys()},
+            key_values=[(d,) for d in destinations],
             value_names=[],
             value_values=[],
             desc="store_destination_rooms_entries_dests",
         )
 
+        rows = [(destination, room_id) for destination in destinations]
         await self.db_pool.simple_upsert_many(
             table="destination_rooms",
-            key_names=("room_id", "destination"),
-            key_values=list(room_and_destination_to_ordering.keys()),
+            key_names=("destination", "room_id"),
+            key_values=rows,
             value_names=["stream_ordering"],
-            value_values=[
-                (stream_id,) for stream_id in room_and_destination_to_ordering.values()
-            ],
+            value_values=[(stream_ordering,)] * len(rows),
             desc="store_destination_rooms_entries_rooms",
         )
 

From 7865bc1dfbc89dd1c238945c04ce37c24a585aa5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 7 May 2021 14:23:02 +0100
Subject: [PATCH 141/278] Always cache 'event_to_prev_state_group'

Fixes regression in send PDU times introduced in #9905.
---
 changelog.d/9950.feature    |  1 +
 synapse/handlers/message.py | 13 +++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/9950.feature

diff --git a/changelog.d/9950.feature b/changelog.d/9950.feature
new file mode 100644
index 00000000000..96a0e7f09fb
--- /dev/null
+++ b/changelog.d/9950.feature
@@ -0,0 +1 @@
+Improve performance of sending events for worker-based deployments using Redis.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index db065ce061f..798043fbf8d 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1050,6 +1050,13 @@ async def cache_joined_hosts_for_event(
         )
 
         if state_entry.state_group:
+            await self._external_cache.set(
+                "event_to_prev_state_group",
+                event.event_id,
+                state_entry.state_group,
+                expiry_ms=60 * 60 * 1000,
+            )
+
             if state_entry.state_group in self._external_cache_joined_hosts_updates:
                 return
 
@@ -1057,12 +1064,6 @@ async def cache_joined_hosts_for_event(
 
             # Note that the expiry times must be larger than the expiry time in
             # _external_cache_joined_hosts_updates.
-            await self._external_cache.set(
-                "event_to_prev_state_group",
-                event.event_id,
-                state_entry.state_group,
-                expiry_ms=60 * 60 * 1000,
-            )
             await self._external_cache.set(
                 "get_joined_hosts",
                 str(state_entry.state_group),

From 6157f02067b0053da77c6537801fa48b2edf9319 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Tue, 11 May 2021 10:49:45 +0100
Subject: [PATCH 142/278] Revert "improve efficiency of _glob_to_re"

This reverts commit edac710bc0c4dc1cd226d9ffe73a00b42c2b67d8.
---
 synapse/push/push_rule_evaluator.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py
index ae077af5b5c..49ecb38522a 100644
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@@ -230,8 +230,7 @@ def _glob_to_re(glob: str, word_boundary: bool) -> Pattern:
     if IS_GLOB.search(glob):
         r = re.escape(glob)
 
-        # replace 1 or more repeats of `\*` with `.*?`
-        r = re.sub(r"(\\\*)+", ".*?", r)
+        r = r.replace(r"\*", ".*?")
         r = r.replace(r"\?", ".")
 
         # handle [abc], [a-z] and [!a-z] style ranges.

From c80e8b98de5697509a7f275f5887849e52c408bd Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 24 Jun 2021 10:57:39 +0100
Subject: [PATCH 143/278] Tweak changelog

---
 CHANGES.md                | 33 +++++++++++++++++++--------------
 changelog.d/10238.removal |  1 -
 2 files changed, 19 insertions(+), 15 deletions(-)
 delete mode 100644 changelog.d/10238.removal

diff --git a/CHANGES.md b/CHANGES.md
index 3cf1814264f..1fdfeef2666 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,12 +1,17 @@
 Synapse 1.37.0rc1 (2021-06-23)
 ==============================
 
+This release deprecates the current spam checker interface. See the [upgrade notes](https://github.com/matrix-org/synapse/blob/develop/UPGRADE.rst#deprecation-of-the-current-spam-checker-interface) for more information on how to update to the new generic module interface.
+
+This release also removes support for fetching and renewing TLS certificate using the ACME v1 protocol, which has been fully decomissioned by Let's Encrypt on June 1st 2021. Admins previously using this feature should use a [reverse proxy](https://matrix-org.github.io/synapse/develop/reverse_proxy.html) to handle TLS termination, or use an external ACME client (such as [certbot](https://certbot.eff.org/)) to retrieve a certificate and key and provide them to Synapse using the `tls_certificate_path` and `tls_private_key_path` configuration settings.
+
+
 Features
 --------
 
-- Implement "room knocking" as per [MSC2403](https://github.com/matrix-org/matrix-doc/pull/2403). Contributed by Sorunome and anoa. ([\#6739](https://github.com/matrix-org/synapse/issues/6739), [\#9359](https://github.com/matrix-org/synapse/issues/9359), [\#10167](https://github.com/matrix-org/synapse/issues/10167), [\#10212](https://github.com/matrix-org/synapse/issues/10212), [\#10227](https://github.com/matrix-org/synapse/issues/10227))
+- Implement "room knocking" as per [MSC2403](https://github.com/matrix-org/matrix-doc/pull/2403). Contributed by @Sorunome and anoa. ([\#6739](https://github.com/matrix-org/synapse/issues/6739), [\#9359](https://github.com/matrix-org/synapse/issues/9359), [\#10167](https://github.com/matrix-org/synapse/issues/10167), [\#10212](https://github.com/matrix-org/synapse/issues/10212), [\#10227](https://github.com/matrix-org/synapse/issues/10227))
 - Add experimental support for backfilling history into rooms ([MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716)). ([\#9247](https://github.com/matrix-org/synapse/issues/9247))
-- Standardised the module interface. ([\#10062](https://github.com/matrix-org/synapse/issues/10062), [\#10206](https://github.com/matrix-org/synapse/issues/10206))
+- Implement a generic interface for third-party plugin modules. ([\#10062](https://github.com/matrix-org/synapse/issues/10062), [\#10206](https://github.com/matrix-org/synapse/issues/10206))
 - Implement config option `sso.update_profile_information` to sync SSO users' profile information with the identity provider each time they login. Currently only displayname is supported. ([\#10108](https://github.com/matrix-org/synapse/issues/10108))
 - Ensure that errors during startup are written to the logs and the console. ([\#10191](https://github.com/matrix-org/synapse/issues/10191))
 
@@ -15,13 +20,13 @@ Bugfixes
 --------
 
 - Fix a bug introduced in Synapse v1.25.0 that prevented the `ip_range_whitelist` configuration option from working for federation and identity servers. Contributed by @mikure. ([\#10115](https://github.com/matrix-org/synapse/issues/10115))
-- Remove a broken import line in Synapse's admin_cmd worker. Broke in 1.33.0. ([\#10154](https://github.com/matrix-org/synapse/issues/10154))
-- Fix a bug introduced in v1.21.0 which could cause `/sync` to return immediately with an empty response. ([\#10157](https://github.com/matrix-org/synapse/issues/10157), [\#10158](https://github.com/matrix-org/synapse/issues/10158))
-- Fix a minor bug in the response to `/_matrix/client/r0/user/{user}/openid/request_token`. Contributed by @lukaslihotzki. ([\#10175](https://github.com/matrix-org/synapse/issues/10175))
+- Remove a broken import line in Synapse's `admin_cmd` worker. Broke in Synapse v1.33.0. ([\#10154](https://github.com/matrix-org/synapse/issues/10154))
+- Fix a bug introduced in Synapse v1.21.0 which could cause `/sync` to return immediately with an empty response. ([\#10157](https://github.com/matrix-org/synapse/issues/10157), [\#10158](https://github.com/matrix-org/synapse/issues/10158))
+- Fix a minor bug in the response to `/_matrix/client/r0/user/{user}/openid/request_token` causing `expires_in` to be a float instead of an integer. Contributed by @lukaslihotzki. ([\#10175](https://github.com/matrix-org/synapse/issues/10175))
 - Always require users to re-authenticate for dangerous operations: deactivating an account, modifying an account password, and adding 3PIDs. ([\#10184](https://github.com/matrix-org/synapse/issues/10184))
-- Fix a bug introduced in Synpase 1.7.2 where remote server count metrics collection would be incorrectly delayed on startup. Found by @heftig. ([\#10195](https://github.com/matrix-org/synapse/issues/10195))
-- Fix a bug introduced in v1.35.1 where an `allow` key of a `m.room.join_rules` event could be applied for incorrect room versions and configurations. ([\#10208](https://github.com/matrix-org/synapse/issues/10208))
-- Fix performance regression in responding to user key requests over federation. Introduced in v1.34.0rc1. ([\#10221](https://github.com/matrix-org/synapse/issues/10221))
+- Fix a bug introduced in Synpase v1.7.2 where remote server count metrics collection would be incorrectly delayed on startup. Found by @heftig. ([\#10195](https://github.com/matrix-org/synapse/issues/10195))
+- Fix a bug introduced in Synapse v1.35.1 where an `allow` key of a `m.room.join_rules` event could be applied for incorrect room versions and configurations. ([\#10208](https://github.com/matrix-org/synapse/issues/10208))
+- Fix performance regression in responding to user key requests over federation. Introduced in Synapse v1.34.0rc1. ([\#10221](https://github.com/matrix-org/synapse/issues/10221))
 
 
 Improved Documentation
@@ -36,9 +41,9 @@ Improved Documentation
 Deprecations and Removals
 -------------------------
 
-- The current spam checker interface is deprecated in favour of a new generic modules system. See the [upgrade notes](https://github.com/matrix-org/synapse/blob/master/UPGRADE.rst#deprecation-of-the-current-spam-checker-interface) for more information on how to update to the new system. ([\#10062](https://github.com/matrix-org/synapse/issues/10062), [\#10210](https://github.com/matrix-org/synapse/issues/10210))
+- The current spam checker interface is deprecated in favour of a new generic modules system. See the [upgrade notes](https://github.com/matrix-org/synapse/blob/develop/UPGRADE.rst#deprecation-of-the-current-spam-checker-interface) for more information on how to update to the new system. ([\#10062](https://github.com/matrix-org/synapse/issues/10062), [\#10210](https://github.com/matrix-org/synapse/issues/10210), [\#10238](https://github.com/matrix-org/synapse/issues/10238))
 - Stop supporting the unstable spaces prefixes from MSC1772. ([\#10161](https://github.com/matrix-org/synapse/issues/10161))
-- Remove Synapse's support for automatically fetching and renewing certificates using the ACME v1 protocol. This protocol has been fully turned off by Let's Encrypt for existing install on June 1st 2021. Admins previously using this feature should use a [reverse proxy](https://matrix-org.github.io/synapse/develop/reverse_proxy.html) to handle TLS termination, or use an external ACME client (such as [certbot](https://certbot.eff.org/)) to retrieve a certificate and key and provide them to Synapse using the `tls_certificate_path` and `tls_private_key_path` configuration settings. ([\#10194](https://github.com/matrix-org/synapse/issues/10194))
+- Remove Synapse's support for automatically fetching and renewing certificates using the ACME v1 protocol. This protocol has been fully turned off by Let's Encrypt for existing installations on June 1st 2021. Admins previously using this feature should use a [reverse proxy](https://matrix-org.github.io/synapse/develop/reverse_proxy.html) to handle TLS termination, or use an external ACME client (such as [certbot](https://certbot.eff.org/)) to retrieve a certificate and key and provide them to Synapse using the `tls_certificate_path` and `tls_private_key_path` configuration settings. ([\#10194](https://github.com/matrix-org/synapse/issues/10194))
 
 
 Internal Changes
@@ -47,21 +52,21 @@ Internal Changes
 - Update the database schema versioning to support gradual migration away from legacy tables. ([\#9933](https://github.com/matrix-org/synapse/issues/9933))
 - Add type hints to the federation servlets. ([\#10080](https://github.com/matrix-org/synapse/issues/10080))
 - Improve OpenTracing for event persistence. ([\#10134](https://github.com/matrix-org/synapse/issues/10134), [\#10193](https://github.com/matrix-org/synapse/issues/10193))
-- Clean up the interface for injecting opentracing over HTTP. ([\#10143](https://github.com/matrix-org/synapse/issues/10143))
+- Clean up the interface for injecting OpenTracing over HTTP. ([\#10143](https://github.com/matrix-org/synapse/issues/10143))
 - Limit the number of in-flight `/keys/query` requests from a single device. ([\#10144](https://github.com/matrix-org/synapse/issues/10144))
 - Refactor EventPersistenceQueue. ([\#10145](https://github.com/matrix-org/synapse/issues/10145))
 - Document `SYNAPSE_TEST_LOG_LEVEL` to see the logger output when running tests. ([\#10148](https://github.com/matrix-org/synapse/issues/10148))
 - Update the Complement build tags in GitHub Actions to test currently experimental features. ([\#10155](https://github.com/matrix-org/synapse/issues/10155))
-- Add `synapse_federation_soft_failed_events_total` metric to track how often events are soft failed. ([\#10156](https://github.com/matrix-org/synapse/issues/10156))
+- Add a `synapse_federation_soft_failed_events_total` metric to track how often events are soft failed. ([\#10156](https://github.com/matrix-org/synapse/issues/10156))
 - Fetch the corresponding complement branch when performing CI. ([\#10160](https://github.com/matrix-org/synapse/issues/10160))
 - Add some developer documentation about boolean columns in database schemas. ([\#10164](https://github.com/matrix-org/synapse/issues/10164))
 - Add extra logging fields to better debug where events are being soft failed. ([\#10168](https://github.com/matrix-org/synapse/issues/10168))
 - Add debug logging for when we enter and exit `Measure` blocks. ([\#10183](https://github.com/matrix-org/synapse/issues/10183))
 - Improve comments in structured logging code. ([\#10188](https://github.com/matrix-org/synapse/issues/10188))
-- Update MSC3083 support for modifications in the MSC. ([\#10189](https://github.com/matrix-org/synapse/issues/10189))
+- Update [MSC3083](https://github.com/matrix-org/matrix-doc/pull/3083) support with modifications from the MSC. ([\#10189](https://github.com/matrix-org/synapse/issues/10189))
 - Remove redundant DNS lookup limiter. ([\#10190](https://github.com/matrix-org/synapse/issues/10190))
 - Upgrade `black` linting tool to 21.6b0. ([\#10197](https://github.com/matrix-org/synapse/issues/10197))
-- Expose opentracing trace id in response headers. ([\#10199](https://github.com/matrix-org/synapse/issues/10199))
+- Expose OpenTracing trace id in response headers. ([\#10199](https://github.com/matrix-org/synapse/issues/10199))
 
 
 Synapse 1.36.0 (2021-06-15)
diff --git a/changelog.d/10238.removal b/changelog.d/10238.removal
deleted file mode 100644
index 5fb7bfb47e7..00000000000
--- a/changelog.d/10238.removal
+++ /dev/null
@@ -1 +0,0 @@
-The current spam checker interface is deprecated in favour of a new generic modules system. See the [upgrade notes](https://github.com/matrix-org/synapse/blob/master/UPGRADE.rst#deprecation-of-the-current-spam-checker-interface) for more information on how to update to the new system.

From 5575b5cb43125d334f510027f6a3482d0b15bd4c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 28 Jun 2021 14:12:48 +0100
Subject: [PATCH 144/278] Don't send catchup transactions for matrix hq

This is because there are problems with HQ atm.
---
 synapse/storage/databases/main/transactions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index d211c423b2c..67655fc2150 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -411,6 +411,7 @@ def _get_catch_up_room_event_ids_txn(
                  JOIN events USING (stream_ordering)
                 WHERE destination = ?
                   AND stream_ordering > ?
+                  AND room_id != '!OGEhHVWSdvArJzumhm:matrix.org'
                 ORDER BY stream_ordering
                 LIMIT 50
             """

From f271b32beaf5515eb798708f4a205f768260d8c2 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 28 Jun 2021 14:17:08 +0100
Subject: [PATCH 145/278] don't send out events for matrix.org

---
 synapse/federation/sender/per_destination_queue.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 3a2efd56eea..a44cc781c84 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -287,6 +287,13 @@ async def _transaction_transmission_loop(self) -> None:
                             len(pending_pdus),
                         )
 
+                        # Filter out HQ traffic for now
+                        pending_pdus = [
+                            pdu
+                            for pdu in pending_pdus
+                            if pdu.room_id != "!OGEhHVWSdvArJzumhm:matrix.org"
+                        ]
+
                     await self._transaction_manager.send_new_transaction(
                         self._destination, pending_pdus, pending_edus
                     )

From ffe17e47cef75fe3d2a5f4c3fbf9c65f6caee1a8 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 28 Jun 2021 14:29:27 +0100
Subject: [PATCH 146/278] Fix SQL

---
 synapse/storage/databases/main/transactions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index 67655fc2150..b3250084c53 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -411,7 +411,7 @@ def _get_catch_up_room_event_ids_txn(
                  JOIN events USING (stream_ordering)
                 WHERE destination = ?
                   AND stream_ordering > ?
-                  AND room_id != '!OGEhHVWSdvArJzumhm:matrix.org'
+                  AND destination_rooms.room_id != '!OGEhHVWSdvArJzumhm:matrix.org'
                 ORDER BY stream_ordering
                 LIMIT 50
             """

From b4bdab8e52e3a236aa5869956e1f35740250f445 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 29 Jun 2021 16:13:09 +0100
Subject: [PATCH 147/278] Revert "Don't send catchup transactions for matrix
 hq"

This reverts commits 5575b5cb43125d334f510027f6a3482d0b15bd4c,
f271b32beaf5515eb798708f4a205f768260d8c2,
ffe17e47cef75fe3d2a5f4c3fbf9c65f6caee1a8
---
 synapse/federation/sender/per_destination_queue.py | 7 -------
 synapse/storage/databases/main/transactions.py     | 1 -
 2 files changed, 8 deletions(-)

diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index a44cc781c84..3a2efd56eea 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -287,13 +287,6 @@ async def _transaction_transmission_loop(self) -> None:
                             len(pending_pdus),
                         )
 
-                        # Filter out HQ traffic for now
-                        pending_pdus = [
-                            pdu
-                            for pdu in pending_pdus
-                            if pdu.room_id != "!OGEhHVWSdvArJzumhm:matrix.org"
-                        ]
-
                     await self._transaction_manager.send_new_transaction(
                         self._destination, pending_pdus, pending_edus
                     )
diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index b3250084c53..d211c423b2c 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -411,7 +411,6 @@ def _get_catch_up_room_event_ids_txn(
                  JOIN events USING (stream_ordering)
                 WHERE destination = ?
                   AND stream_ordering > ?
-                  AND destination_rooms.room_id != '!OGEhHVWSdvArJzumhm:matrix.org'
                 ORDER BY stream_ordering
                 LIMIT 50
             """

From cfddd43bfb8e2f00e5eed1a934ad8b7467052d22 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Tue, 29 Jun 2021 20:56:46 +0100
Subject: [PATCH 148/278] bump background update rate

---
 synapse/storage/background_updates.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 142787fdfd1..c1f4d99e192 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -84,8 +84,9 @@ class BackgroundUpdater:
 
     MINIMUM_BACKGROUND_BATCH_SIZE = 100
     DEFAULT_BACKGROUND_BATCH_SIZE = 100
-    BACKGROUND_UPDATE_INTERVAL_MS = 1000
-    BACKGROUND_UPDATE_DURATION_MS = 100
+    # temporarily increased to make stream_ordering go faster: rv 2021/06/29
+    BACKGROUND_UPDATE_INTERVAL_MS = 10
+    BACKGROUND_UPDATE_DURATION_MS = 1000
 
     def __init__(self, hs: "HomeServer", database: "DatabasePool"):
         self._clock = hs.get_clock()

From 40e92b224c827500bb9c9400d2896e572aa61ea6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=A0imon=20Brandner?= <simon.bra.ag@gmail.com>
Date: Mon, 16 Aug 2021 13:22:38 +0200
Subject: [PATCH 149/278] Handle string read receipt data (#10606)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Handle string read receipt data

Signed-off-by: Å imon Brandner <simon.bra.ag@gmail.com>

* Test that we handle string read receipt data

Signed-off-by: Å imon Brandner <simon.bra.ag@gmail.com>

* Add changelog for #10606

Signed-off-by: Å imon Brandner <simon.bra.ag@gmail.com>

* Add docs

Signed-off-by: Å imon Brandner <simon.bra.ag@gmail.com>

* Ignore malformed RRs

Signed-off-by: Å imon Brandner <simon.bra.ag@gmail.com>

* Only surround hidden = ...

Signed-off-by: Å imon Brandner <simon.bra.ag@gmail.com>

* Remove unnecessary argument

Signed-off-by: Å imon Brandner <simon.bra.ag@gmail.com>

* Update changelog.d/10606.bugfix

Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
---
 changelog.d/10606.bugfix        |  1 +
 synapse/handlers/receipts.py    |  9 ++++++++-
 tests/handlers/test_receipts.py | 23 +++++++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/10606.bugfix

diff --git a/changelog.d/10606.bugfix b/changelog.d/10606.bugfix
new file mode 100644
index 00000000000..bab9fd2a612
--- /dev/null
+++ b/changelog.d/10606.bugfix
@@ -0,0 +1 @@
+Fix errors on /sync when read receipt data is a string. Only affects homeservers with the experimental flag for [MSC2285](https://github.com/matrix-org/matrix-doc/pull/2285) enabled. Contributed by @SimonBrandner.
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index b9085bbccb3..40e0c7bb1b5 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -187,7 +187,14 @@ def filter_out_hidden(events: List[JsonDict], user_id: str) -> List[JsonDict]:
 
                 new_users = {}
                 for rr_user_id, user_rr in m_read.items():
-                    hidden = user_rr.get("hidden", None)
+                    try:
+                        hidden = user_rr.get("hidden")
+                    except AttributeError:
+                        # Due to https://github.com/matrix-org/synapse/issues/10376
+                        # there are cases where user_rr is a string, in those cases
+                        # we just ignore the read receipt
+                        continue
+
                     if hidden is not True or rr_user_id == user_id:
                         new_users[rr_user_id] = user_rr.copy()
                         # If hidden has a value replace hidden with the correct prefixed key
diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py
index 93a9a084b24..732a12c9bd0 100644
--- a/tests/handlers/test_receipts.py
+++ b/tests/handlers/test_receipts.py
@@ -286,6 +286,29 @@ def test_filters_out_receipt_event_with_only_hidden_receipt_and_ignores_rest(sel
             ],
         )
 
+    def test_handles_string_data(self):
+        """
+        Tests that an invalid shape for read-receipts is handled.
+        Context: https://github.com/matrix-org/synapse/issues/10603
+        """
+
+        self._test_filters_hidden(
+            [
+                {
+                    "content": {
+                        "$14356419edgd14394fHBLK:matrix.org": {
+                            "m.read": {
+                                "@rikj:jki.re": "string",
+                            }
+                        },
+                    },
+                    "room_id": "!jEsUZKDJdhlrceRyVU:example.org",
+                    "type": "m.receipt",
+                },
+            ],
+            [],
+        )
+
     def _test_filters_hidden(
         self, events: List[JsonDict], expected_output: List[JsonDict]
     ):

From ac646fed06d31b151a902b1f25f6a82a2d719ffd Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Thu, 26 Aug 2021 09:45:19 +0100
Subject: [PATCH 150/278] Remove some redundant patches from the hotfixes
 branch

---
 synapse/handlers/room_list.py         | 1 -
 synapse/handlers/sync.py              | 1 -
 synapse/storage/background_updates.py | 5 ++---
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index 8d18e07aa68..6d433fad41b 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -47,7 +47,6 @@ class RoomListHandler(BaseHandler):
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
         self.enable_room_list_search = hs.config.enable_room_list_search
-
         self.response_cache: ResponseCache[
             Tuple[Optional[int], Optional[str], Optional[ThirdPartyInstanceID]]
         ] = ResponseCache(hs.get_clock(), "room_list")
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 53ae539d748..590642f510f 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -61,7 +61,6 @@
 # Debug logger for https://github.com/matrix-org/synapse/issues/4422
 issue4422_logger = logging.getLogger("synapse.handler.sync.4422_debug")
 
-SYNC_RESPONSE_CACHE_MS = 2 * 60 * 1000
 
 # Counts the number of times we returned a non-empty sync. `type` is one of
 # "initial_sync", "full_state_sync" or "incremental_sync", `lazy_loaded` is
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 7f975a8f162..82b31d24f1f 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -84,9 +84,8 @@ class BackgroundUpdater:
 
     MINIMUM_BACKGROUND_BATCH_SIZE = 100
     DEFAULT_BACKGROUND_BATCH_SIZE = 100
-    # temporarily increased to make stream_ordering go faster: rv 2021/06/29
-    BACKGROUND_UPDATE_INTERVAL_MS = 10
-    BACKGROUND_UPDATE_DURATION_MS = 1000
+    BACKGROUND_UPDATE_INTERVAL_MS = 1000
+    BACKGROUND_UPDATE_DURATION_MS = 100
 
     def __init__(self, hs: "HomeServer", database: "DatabasePool"):
         self._clock = hs.get_clock()

From 4d03ad5255c8219b39b6f1d3e3cd032a218adb60 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 6 Sep 2021 15:57:57 +0100
Subject: [PATCH 151/278] Expand on why users should read upgrade notes

---
 CHANGES.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index 64c30eed104..67d649a4dd7 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,7 +1,10 @@
 Synapse 1.42.0rc2 (2021-09-06)
 ==============================
 
-Server administrators are reminded to read [the upgrade notes](docs/upgrade.md#upgrading-to-v1420).
+This version of Synapse removes deprecated room-management admin APIs and out-of-date
+email pushers, and improves error handling for fallback templates for user-interactive
+authentication. For more information on these points, server administrators are
+encouraged to read [the upgrade notes](docs/upgrade.md#upgrading-to-v1420).
 
 Features
 --------

From dffdda5f89402f4891774d3bc0ea9b6d299afc83 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 10 Sep 2021 10:42:14 +0100
Subject: [PATCH 152/278] Revert "Expand on why users should read upgrade
 notes"

This reverts commit 4d03ad5255c8219b39b6f1d3e3cd032a218adb60.

This was committed to the wrong branch - it has been superceded by ca3cb1e039
on the release branches.
---
 CHANGES.md | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 67d649a4dd7..64c30eed104 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,10 +1,7 @@
 Synapse 1.42.0rc2 (2021-09-06)
 ==============================
 
-This version of Synapse removes deprecated room-management admin APIs and out-of-date
-email pushers, and improves error handling for fallback templates for user-interactive
-authentication. For more information on these points, server administrators are
-encouraged to read [the upgrade notes](docs/upgrade.md#upgrading-to-v1420).
+Server administrators are reminded to read [the upgrade notes](docs/upgrade.md#upgrading-to-v1420).
 
 Features
 --------

From c9fb203ce0fa8705b33d18f86fc77549c1e1522b Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 15 Sep 2021 14:06:21 +0100
Subject: [PATCH 153/278] Allow LruCaches to opt out of time-based expiry

---
 synapse/util/caches/deferred_cache.py |  2 ++
 synapse/util/caches/descriptors.py    |  5 +++++
 synapse/util/caches/lrucache.py       | 16 +++++++++++++---
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py
index f05590da0d5..6262efe0723 100644
--- a/synapse/util/caches/deferred_cache.py
+++ b/synapse/util/caches/deferred_cache.py
@@ -73,6 +73,7 @@ def __init__(
         tree: bool = False,
         iterable: bool = False,
         apply_cache_factor_from_config: bool = True,
+        prune_unread_entries: bool = True,
     ):
         """
         Args:
@@ -105,6 +106,7 @@ def metrics_cb() -> None:
             size_callback=(lambda d: len(d) or 1) if iterable else None,
             metrics_collection_callback=metrics_cb,
             apply_cache_factor_from_config=apply_cache_factor_from_config,
+            prune_unread_entries=prune_unread_entries,
         )
 
         self.thread: Optional[threading.Thread] = None
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 1ca31e41ac6..b9dcca17f1a 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -258,6 +258,7 @@ def __init__(
         tree=False,
         cache_context=False,
         iterable=False,
+        prune_unread_entries: bool = True,
     ):
         super().__init__(orig, num_args=num_args, cache_context=cache_context)
 
@@ -269,6 +270,7 @@ def __init__(
         self.max_entries = max_entries
         self.tree = tree
         self.iterable = iterable
+        self.prune_unread_entries = prune_unread_entries
 
     def __get__(self, obj, owner):
         cache: DeferredCache[CacheKey, Any] = DeferredCache(
@@ -276,6 +278,7 @@ def __get__(self, obj, owner):
             max_entries=self.max_entries,
             tree=self.tree,
             iterable=self.iterable,
+            prune_unread_entries=self.prune_unread_entries,
         )
 
         get_cache_key = self.cache_key_builder
@@ -507,6 +510,7 @@ def cached(
     tree: bool = False,
     cache_context: bool = False,
     iterable: bool = False,
+    prune_unread_entries: bool = True,
 ) -> Callable[[F], _CachedFunction[F]]:
     func = lambda orig: DeferredCacheDescriptor(
         orig,
@@ -515,6 +519,7 @@ def cached(
         tree=tree,
         cache_context=cache_context,
         iterable=iterable,
+        prune_unread_entries=prune_unread_entries,
     )
 
     return cast(Callable[[F], _CachedFunction[F]], func)
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index 39dce9dd416..17cb98ff0b9 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -202,10 +202,11 @@ def __init__(
         cache: "weakref.ReferenceType[LruCache]",
         clock: Clock,
         callbacks: Collection[Callable[[], None]] = (),
+        prune_unread_entries: bool = True,
     ):
         self._list_node = ListNode.insert_after(self, root)
-        self._global_list_node = None
-        if USE_GLOBAL_LIST:
+        self._global_list_node: Optional[_TimedListNode] = None
+        if USE_GLOBAL_LIST and prune_unread_entries:
             self._global_list_node = _TimedListNode.insert_after(self, GLOBAL_ROOT)
             self._global_list_node.update_last_access(clock)
 
@@ -314,6 +315,7 @@ def __init__(
         metrics_collection_callback: Optional[Callable[[], None]] = None,
         apply_cache_factor_from_config: bool = True,
         clock: Optional[Clock] = None,
+        prune_unread_entries: bool = True,
     ):
         """
         Args:
@@ -427,7 +429,15 @@ def cache_len():
         self.len = synchronized(cache_len)
 
         def add_node(key, value, callbacks: Collection[Callable[[], None]] = ()):
-            node = _Node(list_root, key, value, weak_ref_to_self, real_clock, callbacks)
+            node = _Node(
+                list_root,
+                key,
+                value,
+                weak_ref_to_self,
+                real_clock,
+                callbacks,
+                prune_unread_entries,
+            )
             cache[key] = node
 
             if size_callback:

From 361ffb8f0aa1cc9c5d8a0e5f0e46cbaf3b15340e Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 15 Sep 2021 14:18:42 +0100
Subject: [PATCH 154/278] Don't expire `get_users_who_share_room` & friends

---
 synapse/storage/databases/main/roommember.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 9beeb96aa98..a4ec6bc3289 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -162,7 +162,7 @@ def _check_safe_current_state_events_membership_updated_txn(self, txn):
                 self._check_safe_current_state_events_membership_updated_txn,
             )
 
-    @cached(max_entries=100000, iterable=True)
+    @cached(max_entries=100000, iterable=True, prune_unread_entries=False)
     async def get_users_in_room(self, room_id: str) -> List[str]:
         return await self.db_pool.runInteraction(
             "get_users_in_room", self.get_users_in_room_txn, room_id
@@ -439,7 +439,7 @@ async def get_local_current_membership_for_user_in_room(
 
         return results_dict.get("membership"), results_dict.get("event_id")
 
-    @cached(max_entries=500000, iterable=True)
+    @cached(max_entries=500000, iterable=True, prune_unread_entries=False)
     async def get_rooms_for_user_with_stream_ordering(
         self, user_id: str
     ) -> FrozenSet[GetRoomsForUserWithStreamOrdering]:
@@ -544,7 +544,12 @@ async def get_rooms_for_user(
         )
         return frozenset(r.room_id for r in rooms)
 
-    @cached(max_entries=500000, cache_context=True, iterable=True)
+    @cached(
+        max_entries=500000,
+        cache_context=True,
+        iterable=True,
+        prune_unread_entries=False,
+    )
     async def get_users_who_share_room_with_user(
         self, user_id: str, cache_context: _CacheContext
     ) -> Set[str]:

From 622785848286d614fff3fd2e2ea2d7d522bd739c Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 15 Sep 2021 14:40:26 +0100
Subject: [PATCH 155/278] Changelog

---
 changelog.d/10826.misc | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 changelog.d/10826.misc

diff --git a/changelog.d/10826.misc b/changelog.d/10826.misc
new file mode 100644
index 00000000000..53e56fc362f
--- /dev/null
+++ b/changelog.d/10826.misc
@@ -0,0 +1,2 @@
+Opt out of cache expiry for `get_users_who_share_room_with_user`, to hopefully improve `/sync` performance when you
+haven't synced recently.

From 2bb023ba2b4fa607effbbe247791551263081b72 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 27 Oct 2021 11:56:36 +0100
Subject: [PATCH 156/278] Don't seqscan event_json due to relates_to_id

---
 synapse/storage/databases/main/events_bg_updates.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index f92d8248765..ae3a8a63e42 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -1108,7 +1108,7 @@ def _event_thread_relation_txn(txn: LoggingTransaction) -> int:
                 """
                 SELECT event_id, json FROM event_json
                 LEFT JOIN event_relations USING (event_id)
-                WHERE event_id > ? AND relates_to_id IS NULL
+                WHERE event_id > ? AND event_relations.event_id IS NULL
                 ORDER BY event_id LIMIT ?
                 """,
                 (last_event_id, batch_size),

From 098e964bbccbe7454a7153369a8566f707c42665 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 23 Nov 2021 10:14:22 +0000
Subject: [PATCH 157/278] Prevent the media store from writing outside of the
 configured directory

And the associated changelog/release process updates too.

Applied by patch from the security fork.
---
 CHANGES.md                           |  23 +++
 debian/changelog                     |   6 +
 synapse/__init__.py                  |   2 +-
 synapse/rest/media/v1/_base.py       |  18 +-
 synapse/rest/media/v1/filepath.py    | 241 +++++++++++++++++++++-----
 synapse/util/stringutils.py          |  21 ++-
 tests/http/test_endpoint.py          |   3 +
 tests/rest/media/v1/test_filepath.py | 250 +++++++++++++++++++++++++++
 8 files changed, 513 insertions(+), 51 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index a435d9c5925..fde8d7f81a9 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,26 @@
+Synapse 1.47.1 (2021-11-23)
+===========================
+
+This release fixes a security issue in the media store, affecting all prior releases of Synapse. Server administrators are encouraged to update Synapse as soon as possible. We are not aware of these vulnerabilities being exploited in the wild.
+
+Server administrators who are unable to update Synapse may use the workarounds described in the linked GitHub Security Advisory below.
+
+Security advisory
+-----------------
+
+The following issue is fixed in 1.47.1.
+
+- **[GHSA-3hfw-x7gx-437c](https://github.com/matrix-org/synapse/security/advisories/GHSA-3hfw-x7gx-437c) / [CVE-2021-41281](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41281): Path traversal when downloading remote media.**
+
+  Synapse instances with the media repository enabled can be tricked into downloading a file from a remote server into an arbitrary directory, potentially outside the media store directory.
+
+  The last two directories and file name of the path are chosen randomly by Synapse and cannot be controlled by an attacker, which limits the impact.
+
+  Homeservers with the media repository disabled are unaffected. Homeservers configured with a federation whitelist are also unaffected.
+
+  Fixed by [91f2bd090](https://github.com/matrix-org/synapse/commit/91f2bd090).
+
+
 Synapse 1.47.0 (2021-11-17)
 ===========================
 
diff --git a/debian/changelog b/debian/changelog
index ba75d0b2517..35c9063388e 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.47.1) stable; urgency=medium
+
+  * New synapse release 1.47.1.
+
+ -- Synapse Packaging team <packages@matrix.org>  Fri, 19 Nov 2021 13:44:32 +0000
+
 matrix-synapse-py3 (1.47.0) stable; urgency=medium
 
   * New synapse release 1.47.0.
diff --git a/synapse/__init__.py b/synapse/__init__.py
index aa964afb5e6..48ac38aec66 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -47,7 +47,7 @@
 except ImportError:
     pass
 
-__version__ = "1.47.0"
+__version__ = "1.47.1"
 
 if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
     # We import here so that we don't have to install a bunch of deps when
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index 014fa893d6c..9b40fd8a6c2 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -29,7 +29,7 @@
 from synapse.http.server import finish_request, respond_with_json
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import make_deferred_yieldable
-from synapse.util.stringutils import is_ascii
+from synapse.util.stringutils import is_ascii, parse_and_validate_server_name
 
 logger = logging.getLogger(__name__)
 
@@ -51,6 +51,19 @@
 
 
 def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]:
+    """Parses the server name, media ID and optional file name from the request URI
+
+    Also performs some rough validation on the server name.
+
+    Args:
+        request: The `Request`.
+
+    Returns:
+        A tuple containing the parsed server name, media ID and optional file name.
+
+    Raises:
+        SynapseError(404): if parsing or validation fail for any reason
+    """
     try:
         # The type on postpath seems incorrect in Twisted 21.2.0.
         postpath: List[bytes] = request.postpath  # type: ignore
@@ -62,6 +75,9 @@ def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]:
         server_name = server_name_bytes.decode("utf-8")
         media_id = media_id_bytes.decode("utf8")
 
+        # Validate the server name, raising if invalid
+        parse_and_validate_server_name(server_name)
+
         file_name = None
         if len(postpath) > 2:
             try:
diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py
index bec77088ee7..c0e15c65139 100644
--- a/synapse/rest/media/v1/filepath.py
+++ b/synapse/rest/media/v1/filepath.py
@@ -16,7 +16,8 @@
 import functools
 import os
 import re
-from typing import Any, Callable, List, TypeVar, cast
+import string
+from typing import Any, Callable, List, TypeVar, Union, cast
 
 NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d")
 
@@ -37,6 +38,85 @@ def _wrapped(self: "MediaFilePaths", *args: Any, **kwargs: Any) -> str:
     return cast(F, _wrapped)
 
 
+GetPathMethod = TypeVar(
+    "GetPathMethod", bound=Union[Callable[..., str], Callable[..., List[str]]]
+)
+
+
+def _wrap_with_jail_check(func: GetPathMethod) -> GetPathMethod:
+    """Wraps a path-returning method to check that the returned path(s) do not escape
+    the media store directory.
+
+    The check is not expected to ever fail, unless `func` is missing a call to
+    `_validate_path_component`, or `_validate_path_component` is buggy.
+
+    Args:
+        func: The `MediaFilePaths` method to wrap. The method may return either a single
+            path, or a list of paths. Returned paths may be either absolute or relative.
+
+    Returns:
+        The method, wrapped with a check to ensure that the returned path(s) lie within
+        the media store directory. Raises a `ValueError` if the check fails.
+    """
+
+    @functools.wraps(func)
+    def _wrapped(
+        self: "MediaFilePaths", *args: Any, **kwargs: Any
+    ) -> Union[str, List[str]]:
+        path_or_paths = func(self, *args, **kwargs)
+
+        if isinstance(path_or_paths, list):
+            paths_to_check = path_or_paths
+        else:
+            paths_to_check = [path_or_paths]
+
+        for path in paths_to_check:
+            # path may be an absolute or relative path, depending on the method being
+            # wrapped. When "appending" an absolute path, `os.path.join` discards the
+            # previous path, which is desired here.
+            normalized_path = os.path.normpath(os.path.join(self.real_base_path, path))
+            if (
+                os.path.commonpath([normalized_path, self.real_base_path])
+                != self.real_base_path
+            ):
+                raise ValueError(f"Invalid media store path: {path!r}")
+
+        return path_or_paths
+
+    return cast(GetPathMethod, _wrapped)
+
+
+ALLOWED_CHARACTERS = set(
+    string.ascii_letters
+    + string.digits
+    + "_-"
+    + ".[]:"  # Domain names, IPv6 addresses and ports in server names
+)
+FORBIDDEN_NAMES = {
+    "",
+    os.path.curdir,  # "." for the current platform
+    os.path.pardir,  # ".." for the current platform
+}
+
+
+def _validate_path_component(name: str) -> str:
+    """Checks that the given string can be safely used as a path component
+
+    Args:
+        name: The path component to check.
+
+    Returns:
+        The path component if valid.
+
+    Raises:
+        ValueError: If `name` cannot be safely used as a path component.
+    """
+    if not ALLOWED_CHARACTERS.issuperset(name) or name in FORBIDDEN_NAMES:
+        raise ValueError(f"Invalid path component: {name!r}")
+
+    return name
+
+
 class MediaFilePaths:
     """Describes where files are stored on disk.
 
@@ -48,22 +128,46 @@ class MediaFilePaths:
     def __init__(self, primary_base_path: str):
         self.base_path = primary_base_path
 
+        # The media store directory, with all symlinks resolved.
+        self.real_base_path = os.path.realpath(primary_base_path)
+
+        # Refuse to initialize if paths cannot be validated correctly for the current
+        # platform.
+        assert os.path.sep not in ALLOWED_CHARACTERS
+        assert os.path.altsep not in ALLOWED_CHARACTERS
+        # On Windows, paths have all sorts of weirdness which `_validate_path_component`
+        # does not consider. In any case, the remote media store can't work correctly
+        # for certain homeservers there, since ":"s aren't allowed in paths.
+        assert os.name == "posix"
+
+    @_wrap_with_jail_check
     def local_media_filepath_rel(self, media_id: str) -> str:
-        return os.path.join("local_content", media_id[0:2], media_id[2:4], media_id[4:])
+        return os.path.join(
+            "local_content",
+            _validate_path_component(media_id[0:2]),
+            _validate_path_component(media_id[2:4]),
+            _validate_path_component(media_id[4:]),
+        )
 
     local_media_filepath = _wrap_in_base_path(local_media_filepath_rel)
 
+    @_wrap_with_jail_check
     def local_media_thumbnail_rel(
         self, media_id: str, width: int, height: int, content_type: str, method: str
     ) -> str:
         top_level_type, sub_type = content_type.split("/")
         file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method)
         return os.path.join(
-            "local_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], file_name
+            "local_thumbnails",
+            _validate_path_component(media_id[0:2]),
+            _validate_path_component(media_id[2:4]),
+            _validate_path_component(media_id[4:]),
+            _validate_path_component(file_name),
         )
 
     local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel)
 
+    @_wrap_with_jail_check
     def local_media_thumbnail_dir(self, media_id: str) -> str:
         """
         Retrieve the local store path of thumbnails of a given media_id
@@ -76,18 +180,24 @@ def local_media_thumbnail_dir(self, media_id: str) -> str:
         return os.path.join(
             self.base_path,
             "local_thumbnails",
-            media_id[0:2],
-            media_id[2:4],
-            media_id[4:],
+            _validate_path_component(media_id[0:2]),
+            _validate_path_component(media_id[2:4]),
+            _validate_path_component(media_id[4:]),
         )
 
+    @_wrap_with_jail_check
     def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str:
         return os.path.join(
-            "remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:]
+            "remote_content",
+            _validate_path_component(server_name),
+            _validate_path_component(file_id[0:2]),
+            _validate_path_component(file_id[2:4]),
+            _validate_path_component(file_id[4:]),
         )
 
     remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel)
 
+    @_wrap_with_jail_check
     def remote_media_thumbnail_rel(
         self,
         server_name: str,
@@ -101,11 +211,11 @@ def remote_media_thumbnail_rel(
         file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method)
         return os.path.join(
             "remote_thumbnail",
-            server_name,
-            file_id[0:2],
-            file_id[2:4],
-            file_id[4:],
-            file_name,
+            _validate_path_component(server_name),
+            _validate_path_component(file_id[0:2]),
+            _validate_path_component(file_id[2:4]),
+            _validate_path_component(file_id[4:]),
+            _validate_path_component(file_name),
         )
 
     remote_media_thumbnail = _wrap_in_base_path(remote_media_thumbnail_rel)
@@ -113,6 +223,7 @@ def remote_media_thumbnail_rel(
     # Legacy path that was used to store thumbnails previously.
     # Should be removed after some time, when most of the thumbnails are stored
     # using the new path.
+    @_wrap_with_jail_check
     def remote_media_thumbnail_rel_legacy(
         self, server_name: str, file_id: str, width: int, height: int, content_type: str
     ) -> str:
@@ -120,43 +231,66 @@ def remote_media_thumbnail_rel_legacy(
         file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type)
         return os.path.join(
             "remote_thumbnail",
-            server_name,
-            file_id[0:2],
-            file_id[2:4],
-            file_id[4:],
-            file_name,
+            _validate_path_component(server_name),
+            _validate_path_component(file_id[0:2]),
+            _validate_path_component(file_id[2:4]),
+            _validate_path_component(file_id[4:]),
+            _validate_path_component(file_name),
         )
 
     def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str:
         return os.path.join(
             self.base_path,
             "remote_thumbnail",
-            server_name,
-            file_id[0:2],
-            file_id[2:4],
-            file_id[4:],
+            _validate_path_component(server_name),
+            _validate_path_component(file_id[0:2]),
+            _validate_path_component(file_id[2:4]),
+            _validate_path_component(file_id[4:]),
         )
 
+    @_wrap_with_jail_check
     def url_cache_filepath_rel(self, media_id: str) -> str:
         if NEW_FORMAT_ID_RE.match(media_id):
             # Media id is of the form <DATE><RANDOM_STRING>
             # E.g.: 2017-09-28-fsdRDt24DS234dsf
-            return os.path.join("url_cache", media_id[:10], media_id[11:])
+            return os.path.join(
+                "url_cache",
+                _validate_path_component(media_id[:10]),
+                _validate_path_component(media_id[11:]),
+            )
         else:
-            return os.path.join("url_cache", media_id[0:2], media_id[2:4], media_id[4:])
+            return os.path.join(
+                "url_cache",
+                _validate_path_component(media_id[0:2]),
+                _validate_path_component(media_id[2:4]),
+                _validate_path_component(media_id[4:]),
+            )
 
     url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel)
 
+    @_wrap_with_jail_check
     def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]:
         "The dirs to try and remove if we delete the media_id file"
         if NEW_FORMAT_ID_RE.match(media_id):
-            return [os.path.join(self.base_path, "url_cache", media_id[:10])]
+            return [
+                os.path.join(
+                    self.base_path, "url_cache", _validate_path_component(media_id[:10])
+                )
+            ]
         else:
             return [
-                os.path.join(self.base_path, "url_cache", media_id[0:2], media_id[2:4]),
-                os.path.join(self.base_path, "url_cache", media_id[0:2]),
+                os.path.join(
+                    self.base_path,
+                    "url_cache",
+                    _validate_path_component(media_id[0:2]),
+                    _validate_path_component(media_id[2:4]),
+                ),
+                os.path.join(
+                    self.base_path, "url_cache", _validate_path_component(media_id[0:2])
+                ),
             ]
 
+    @_wrap_with_jail_check
     def url_cache_thumbnail_rel(
         self, media_id: str, width: int, height: int, content_type: str, method: str
     ) -> str:
@@ -168,37 +302,46 @@ def url_cache_thumbnail_rel(
 
         if NEW_FORMAT_ID_RE.match(media_id):
             return os.path.join(
-                "url_cache_thumbnails", media_id[:10], media_id[11:], file_name
+                "url_cache_thumbnails",
+                _validate_path_component(media_id[:10]),
+                _validate_path_component(media_id[11:]),
+                _validate_path_component(file_name),
             )
         else:
             return os.path.join(
                 "url_cache_thumbnails",
-                media_id[0:2],
-                media_id[2:4],
-                media_id[4:],
-                file_name,
+                _validate_path_component(media_id[0:2]),
+                _validate_path_component(media_id[2:4]),
+                _validate_path_component(media_id[4:]),
+                _validate_path_component(file_name),
             )
 
     url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel)
 
+    @_wrap_with_jail_check
     def url_cache_thumbnail_directory_rel(self, media_id: str) -> str:
         # Media id is of the form <DATE><RANDOM_STRING>
         # E.g.: 2017-09-28-fsdRDt24DS234dsf
 
         if NEW_FORMAT_ID_RE.match(media_id):
-            return os.path.join("url_cache_thumbnails", media_id[:10], media_id[11:])
+            return os.path.join(
+                "url_cache_thumbnails",
+                _validate_path_component(media_id[:10]),
+                _validate_path_component(media_id[11:]),
+            )
         else:
             return os.path.join(
                 "url_cache_thumbnails",
-                media_id[0:2],
-                media_id[2:4],
-                media_id[4:],
+                _validate_path_component(media_id[0:2]),
+                _validate_path_component(media_id[2:4]),
+                _validate_path_component(media_id[4:]),
             )
 
     url_cache_thumbnail_directory = _wrap_in_base_path(
         url_cache_thumbnail_directory_rel
     )
 
+    @_wrap_with_jail_check
     def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]:
         "The dirs to try and remove if we delete the media_id thumbnails"
         # Media id is of the form <DATE><RANDOM_STRING>
@@ -206,21 +349,35 @@ def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]:
         if NEW_FORMAT_ID_RE.match(media_id):
             return [
                 os.path.join(
-                    self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:]
+                    self.base_path,
+                    "url_cache_thumbnails",
+                    _validate_path_component(media_id[:10]),
+                    _validate_path_component(media_id[11:]),
+                ),
+                os.path.join(
+                    self.base_path,
+                    "url_cache_thumbnails",
+                    _validate_path_component(media_id[:10]),
                 ),
-                os.path.join(self.base_path, "url_cache_thumbnails", media_id[:10]),
             ]
         else:
             return [
                 os.path.join(
                     self.base_path,
                     "url_cache_thumbnails",
-                    media_id[0:2],
-                    media_id[2:4],
-                    media_id[4:],
+                    _validate_path_component(media_id[0:2]),
+                    _validate_path_component(media_id[2:4]),
+                    _validate_path_component(media_id[4:]),
                 ),
                 os.path.join(
-                    self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4]
+                    self.base_path,
+                    "url_cache_thumbnails",
+                    _validate_path_component(media_id[0:2]),
+                    _validate_path_component(media_id[2:4]),
+                ),
+                os.path.join(
+                    self.base_path,
+                    "url_cache_thumbnails",
+                    _validate_path_component(media_id[0:2]),
                 ),
-                os.path.join(self.base_path, "url_cache_thumbnails", media_id[0:2]),
             ]
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index f0294321912..ea1032b4fcf 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -19,6 +19,8 @@
 from collections.abc import Iterable
 from typing import Optional, Tuple
 
+from netaddr import valid_ipv6
+
 from synapse.api.errors import Codes, SynapseError
 
 _string_with_symbols = string.digits + string.ascii_letters + ".,;:^&*-_+=#~@"
@@ -97,7 +99,10 @@ def parse_server_name(server_name: str) -> Tuple[str, Optional[int]]:
         raise ValueError("Invalid server name '%s'" % server_name)
 
 
-VALID_HOST_REGEX = re.compile("\\A[0-9a-zA-Z.-]+\\Z")
+# An approximation of the domain name syntax in RFC 1035, section 2.3.1.
+# NB: "\Z" is not equivalent to "$".
+#     The latter will match the position before a "\n" at the end of a string.
+VALID_HOST_REGEX = re.compile("\\A[0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*\\Z")
 
 
 def parse_and_validate_server_name(server_name: str) -> Tuple[str, Optional[int]]:
@@ -122,13 +127,15 @@ def parse_and_validate_server_name(server_name: str) -> Tuple[str, Optional[int]
     if host[0] == "[":
         if host[-1] != "]":
             raise ValueError("Mismatched [...] in server name '%s'" % (server_name,))
-        return host, port
 
-    # otherwise it should only be alphanumerics.
-    if not VALID_HOST_REGEX.match(host):
-        raise ValueError(
-            "Server name '%s' contains invalid characters" % (server_name,)
-        )
+        # valid_ipv6 raises when given an empty string
+        ipv6_address = host[1:-1]
+        if not ipv6_address or not valid_ipv6(ipv6_address):
+            raise ValueError(
+                "Server name '%s' is not a valid IPv6 address" % (server_name,)
+            )
+    elif not VALID_HOST_REGEX.match(host):
+        raise ValueError("Server name '%s' has an invalid format" % (server_name,))
 
     return host, port
 
diff --git a/tests/http/test_endpoint.py b/tests/http/test_endpoint.py
index 1f9a2f9b1d3..c8cc21cadd2 100644
--- a/tests/http/test_endpoint.py
+++ b/tests/http/test_endpoint.py
@@ -36,8 +36,11 @@ def test_validate_bad_server_names(self):
             "localhost:http",  # non-numeric port
             "1234]",  # smells like ipv6 literal but isn't
             "[1234",
+            "[1.2.3.4]",
             "underscore_.com",
             "percent%65.com",
+            "newline.com\n",
+            ".empty-label.com",
             "1234:5678:80",  # too many colons
         ]
         for i in test_data:
diff --git a/tests/rest/media/v1/test_filepath.py b/tests/rest/media/v1/test_filepath.py
index 09504a485f7..8fe94f7d853 100644
--- a/tests/rest/media/v1/test_filepath.py
+++ b/tests/rest/media/v1/test_filepath.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import inspect
+from typing import Iterable
+
 from synapse.rest.media.v1.filepath import MediaFilePaths
 
 from tests import unittest
@@ -236,3 +239,250 @@ def test_url_cache_thumbnail_dirs_to_delete_legacy(self):
                 "/media_store/url_cache_thumbnails/Ge",
             ],
         )
+
+    def test_server_name_validation(self):
+        """Test validation of server names"""
+        self._test_path_validation(
+            [
+                "remote_media_filepath_rel",
+                "remote_media_filepath",
+                "remote_media_thumbnail_rel",
+                "remote_media_thumbnail",
+                "remote_media_thumbnail_rel_legacy",
+                "remote_media_thumbnail_dir",
+            ],
+            parameter="server_name",
+            valid_values=[
+                "matrix.org",
+                "matrix.org:8448",
+                "matrix-federation.matrix.org",
+                "matrix-federation.matrix.org:8448",
+                "10.1.12.123",
+                "10.1.12.123:8448",
+                "[fd00:abcd::ffff]",
+                "[fd00:abcd::ffff]:8448",
+            ],
+            invalid_values=[
+                "/matrix.org",
+                "matrix.org/..",
+                "matrix.org\x00",
+                "",
+                ".",
+                "..",
+                "/",
+            ],
+        )
+
+    def test_file_id_validation(self):
+        """Test validation of local, remote and legacy URL cache file / media IDs"""
+        # File / media IDs get split into three parts to form paths, consisting of the
+        # first two characters, next two characters and rest of the ID.
+        valid_file_ids = [
+            "GerZNDnDZVjsOtardLuwfIBg",
+            # Unexpected, but produces an acceptable path:
+            "GerZN",  # "N" becomes the last directory
+        ]
+        invalid_file_ids = [
+            "/erZNDnDZVjsOtardLuwfIBg",
+            "Ge/ZNDnDZVjsOtardLuwfIBg",
+            "GerZ/DnDZVjsOtardLuwfIBg",
+            "GerZ/..",
+            "G\x00rZNDnDZVjsOtardLuwfIBg",
+            "Ger\x00NDnDZVjsOtardLuwfIBg",
+            "GerZNDnDZVjsOtardLuwfIBg\x00",
+            "",
+            "Ge",
+            "GerZ",
+            "GerZ.",
+            "..rZNDnDZVjsOtardLuwfIBg",
+            "Ge..NDnDZVjsOtardLuwfIBg",
+            "GerZ..",
+            "GerZ/",
+        ]
+
+        self._test_path_validation(
+            [
+                "local_media_filepath_rel",
+                "local_media_filepath",
+                "local_media_thumbnail_rel",
+                "local_media_thumbnail",
+                "local_media_thumbnail_dir",
+                # Legacy URL cache media IDs
+                "url_cache_filepath_rel",
+                "url_cache_filepath",
+                # `url_cache_filepath_dirs_to_delete` is tested below.
+                "url_cache_thumbnail_rel",
+                "url_cache_thumbnail",
+                "url_cache_thumbnail_directory_rel",
+                "url_cache_thumbnail_directory",
+                "url_cache_thumbnail_dirs_to_delete",
+            ],
+            parameter="media_id",
+            valid_values=valid_file_ids,
+            invalid_values=invalid_file_ids,
+        )
+
+        # `url_cache_filepath_dirs_to_delete` ignores what would be the last path
+        # component, so only the first 4 characters matter.
+        self._test_path_validation(
+            [
+                "url_cache_filepath_dirs_to_delete",
+            ],
+            parameter="media_id",
+            valid_values=valid_file_ids,
+            invalid_values=[
+                "/erZNDnDZVjsOtardLuwfIBg",
+                "Ge/ZNDnDZVjsOtardLuwfIBg",
+                "G\x00rZNDnDZVjsOtardLuwfIBg",
+                "Ger\x00NDnDZVjsOtardLuwfIBg",
+                "",
+                "Ge",
+                "..rZNDnDZVjsOtardLuwfIBg",
+                "Ge..NDnDZVjsOtardLuwfIBg",
+            ],
+        )
+
+        self._test_path_validation(
+            [
+                "remote_media_filepath_rel",
+                "remote_media_filepath",
+                "remote_media_thumbnail_rel",
+                "remote_media_thumbnail",
+                "remote_media_thumbnail_rel_legacy",
+                "remote_media_thumbnail_dir",
+            ],
+            parameter="file_id",
+            valid_values=valid_file_ids,
+            invalid_values=invalid_file_ids,
+        )
+
+    def test_url_cache_media_id_validation(self):
+        """Test validation of URL cache media IDs"""
+        self._test_path_validation(
+            [
+                "url_cache_filepath_rel",
+                "url_cache_filepath",
+                # `url_cache_filepath_dirs_to_delete` only cares about the date prefix
+                "url_cache_thumbnail_rel",
+                "url_cache_thumbnail",
+                "url_cache_thumbnail_directory_rel",
+                "url_cache_thumbnail_directory",
+                "url_cache_thumbnail_dirs_to_delete",
+            ],
+            parameter="media_id",
+            valid_values=[
+                "2020-01-02_GerZNDnDZVjsOtar",
+                "2020-01-02_G",  # Unexpected, but produces an acceptable path
+            ],
+            invalid_values=[
+                "2020-01-02",
+                "2020-01-02-",
+                "2020-01-02-.",
+                "2020-01-02-..",
+                "2020-01-02-/",
+                "2020-01-02-/GerZNDnDZVjsOtar",
+                "2020-01-02-GerZNDnDZVjsOtar/..",
+                "2020-01-02-GerZNDnDZVjsOtar\x00",
+            ],
+        )
+
+    def test_content_type_validation(self):
+        """Test validation of thumbnail content types"""
+        self._test_path_validation(
+            [
+                "local_media_thumbnail_rel",
+                "local_media_thumbnail",
+                "remote_media_thumbnail_rel",
+                "remote_media_thumbnail",
+                "remote_media_thumbnail_rel_legacy",
+                "url_cache_thumbnail_rel",
+                "url_cache_thumbnail",
+            ],
+            parameter="content_type",
+            valid_values=[
+                "image/jpeg",
+            ],
+            invalid_values=[
+                "",  # ValueError: not enough values to unpack
+                "image/jpeg/abc",  # ValueError: too many values to unpack
+                "image/jpeg\x00",
+            ],
+        )
+
+    def test_thumbnail_method_validation(self):
+        """Test validation of thumbnail methods"""
+        self._test_path_validation(
+            [
+                "local_media_thumbnail_rel",
+                "local_media_thumbnail",
+                "remote_media_thumbnail_rel",
+                "remote_media_thumbnail",
+                "url_cache_thumbnail_rel",
+                "url_cache_thumbnail",
+            ],
+            parameter="method",
+            valid_values=[
+                "crop",
+                "scale",
+            ],
+            invalid_values=[
+                "/scale",
+                "scale/..",
+                "scale\x00",
+                "/",
+            ],
+        )
+
+    def _test_path_validation(
+        self,
+        methods: Iterable[str],
+        parameter: str,
+        valid_values: Iterable[str],
+        invalid_values: Iterable[str],
+    ):
+        """Test that the specified methods validate the named parameter as expected
+
+        Args:
+            methods: The names of `MediaFilePaths` methods to test
+            parameter: The name of the parameter to test
+            valid_values: A list of parameter values that are expected to be accepted
+            invalid_values: A list of parameter values that are expected to be rejected
+
+        Raises:
+            AssertionError: If a value was accepted when it should have failed
+                validation.
+            ValueError: If a value failed validation when it should have been accepted.
+        """
+        for method in methods:
+            get_path = getattr(self.filepaths, method)
+
+            parameters = inspect.signature(get_path).parameters
+            kwargs = {
+                "server_name": "matrix.org",
+                "media_id": "GerZNDnDZVjsOtardLuwfIBg",
+                "file_id": "GerZNDnDZVjsOtardLuwfIBg",
+                "width": 800,
+                "height": 600,
+                "content_type": "image/jpeg",
+                "method": "scale",
+            }
+
+            if get_path.__name__.startswith("url_"):
+                kwargs["media_id"] = "2020-01-02_GerZNDnDZVjsOtar"
+
+            kwargs = {k: v for k, v in kwargs.items() if k in parameters}
+            kwargs.pop(parameter)
+
+            for value in valid_values:
+                kwargs[parameter] = value
+                get_path(**kwargs)
+                # No exception should be raised
+
+            for value in invalid_values:
+                with self.assertRaises(ValueError):
+                    kwargs[parameter] = value
+                    path_or_list = get_path(**kwargs)
+                    self.fail(
+                        f"{value!r} unexpectedly passed validation: "
+                        f"{method} returned {path_or_list!r}"
+                    )

From a9c146ac53a40d191c0a2b3b1b8f6ceaea0eee79 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 15 Dec 2021 13:08:24 +0000
Subject: [PATCH 158/278] Disable aggregation bundling on `/sync` responses

A partial revert of #11478. This turns out to have had a significant CPU impact
on initial-sync handling. For now, let's disable it, until we find a more
efficient way of achieving this.
---
 changelog.d/11583.bugfix    | 1 +
 synapse/rest/client/sync.py | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/11583.bugfix

diff --git a/changelog.d/11583.bugfix b/changelog.d/11583.bugfix
new file mode 100644
index 00000000000..d2ed113e212
--- /dev/null
+++ b/changelog.d/11583.bugfix
@@ -0,0 +1 @@
+Fix a performance regression in `/sync` handling, introduced in 1.49.0.
diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py
index 88e4f5e0630..e556ff93e67 100644
--- a/synapse/rest/client/sync.py
+++ b/synapse/rest/client/sync.py
@@ -522,7 +522,13 @@ def serialize(events: Iterable[EventBase]) -> Awaitable[List[JsonDict]]:
                 time_now=time_now,
                 # Don't bother to bundle aggregations if the timeline is unlimited,
                 # as clients will have all the necessary information.
-                bundle_aggregations=room.timeline.limited,
+                # bundle_aggregations=room.timeline.limited,
+                #
+                # richvdh 2021-12-15: disable this temporarily as it has too high an
+                # overhead for initialsyncs. We need to figure out a way that the
+                # bundling can be done *before* the events are stored in the
+                # SyncResponseCache so that this part can be synchronous.
+                bundle_aggregations=False,
                 token_id=token_id,
                 event_format=event_formatter,
                 only_event_fields=only_fields,

From 99e7fb1d52204a7b7cae9f2d9e0a51e1febf8e01 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 7 Jan 2022 11:53:28 +0000
Subject: [PATCH 159/278] Wrap connection.commit with OpenTracing

This is an attempt to diagnose poor apdex levels, per
https://github.com/matrix-org/internal-config/issues/1181
---
 synapse/storage/database.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 2cacc7dd6c5..3f848ca86fe 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -50,6 +50,7 @@
     current_context,
     make_deferred_yieldable,
 )
+from synapse.logging.opentracing import trace
 from synapse.metrics import register_threadpool
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.background_updates import BackgroundUpdater
@@ -104,8 +105,20 @@ def _on_new_connection(conn):
         # Ensure we have a logging context so we can correctly track queries,
         # etc.
         with LoggingContext("db.on_new_connection"):
+            # HACK Patch the connection's commit function so that we can see
+            #      how long it's taking from Jaeger.
+            class NastyConnectionWrapper:
+                def __init__(self, connection):
+                    self._connection = connection
+                    self.commit = trace(connection.commit, "db.conn.commit")
+
+                def __getattr__(self, item):
+                    return getattr(self._connection, item)
+
             engine.on_new_connection(
-                LoggingDatabaseConnection(conn, engine, "on_new_connection")
+                LoggingDatabaseConnection(
+                    NastyConnectionWrapper(conn), engine, "on_new_connection"
+                )
             )
 
     connection_pool = adbapi.ConnectionPool(

From 5cc41f1b05416954f4c9e7aea1df308f4a451abe Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Fri, 7 Jan 2022 12:49:54 +0000
Subject: [PATCH 160/278] Updates to opentracing hackery

---
 synapse/storage/database.py | 49 ++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 3f848ca86fe..b62719e1efe 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -41,6 +41,7 @@
 from typing_extensions import Literal
 
 from twisted.enterprise import adbapi
+from twisted.python import reflect
 
 from synapse.api.errors import StoreError
 from synapse.config.database import DatabaseConnectionConfig
@@ -50,7 +51,6 @@
     current_context,
     make_deferred_yieldable,
 )
-from synapse.logging.opentracing import trace
 from synapse.metrics import register_threadpool
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.background_updates import BackgroundUpdater
@@ -91,6 +91,20 @@
 }
 
 
+class NastyConnectionWrapper:
+    def __init__(self, connection):
+        self._connection = connection
+        self._synapse_parent_context = None
+
+    def commit(self, *args, **kwargs):
+        with LoggingContext("db_commit", parent_context = self._synapse_parent_context):
+            with opentracing.start_active_span("db.conn.commit"):
+                self._connection.commit(*args, **kwargs)
+
+    def __getattr__(self, item):
+        return getattr(self._connection, item)
+
+
 def make_pool(
     reactor, db_config: DatabaseConnectionConfig, engine: BaseDatabaseEngine
 ) -> adbapi.ConnectionPool:
@@ -105,22 +119,29 @@ def _on_new_connection(conn):
         # Ensure we have a logging context so we can correctly track queries,
         # etc.
         with LoggingContext("db.on_new_connection"):
-            # HACK Patch the connection's commit function so that we can see
-            #      how long it's taking from Jaeger.
-            class NastyConnectionWrapper:
-                def __init__(self, connection):
-                    self._connection = connection
-                    self.commit = trace(connection.commit, "db.conn.commit")
-
-                def __getattr__(self, item):
-                    return getattr(self._connection, item)
-
             engine.on_new_connection(
                 LoggingDatabaseConnection(
-                    NastyConnectionWrapper(conn), engine, "on_new_connection"
+                    conn, engine, "on_new_connection"
                 )
             )
 
+    # HACK Patch the connection's commit function so that we can see
+    #      how long it's taking from Jaeger. To do that, we need to patch the
+    #      dbapi module's 'connect' method so that it returns a wrapped 'Connection'
+    #      object to the connection pool. (psycopg2's Connection class is a C thing
+    #      which we can't monkey-patch directly).
+    dbapiname = db_config.config["name"]
+    dbapi = reflect.namedModule(dbapiname)
+    if not getattr(dbapi, "_synapse_wrapped_dbapi", False):
+        real_connect = dbapi.connect
+
+        def wrapped_connect(*args, **kwargs):
+            conn = real_connect(*args, **kwargs)
+            return NastyConnectionWrapper(conn)
+
+        dbapi.connect = wrapped_connect
+        dbapi._synapse_wrapped_dbapi = True
+
     connection_pool = adbapi.ConnectionPool(
         db_config.config["name"],
         cp_reactor=reactor,
@@ -813,6 +834,10 @@ def inner_func(conn, *args, **kwargs):
             # pool).
             assert not self.engine.in_transaction(conn)
 
+            # HACK: record the parent context in 'conn' so that we can tie later commits
+            #    back to it
+            conn._connection._synapse_parent_context = parent_context
+
             with LoggingContext(
                 str(curr_context), parent_context=parent_context
             ) as context:

From 2b9f741f3a9dee93e9744774b342c35ce60062c4 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Sun, 23 Jan 2022 21:08:52 +0000
Subject: [PATCH 161/278] Fix logic for dropping old events in fed queue

Fixes infinite loops of

> logger.info("Invalid prev_events for %s", event_id)
---
 synapse/storage/databases/main/event_federation.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 270b30800bf..a556f17dac1 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -1432,7 +1432,10 @@ async def prune_staged_events_in_room(
 
             if room_version.event_format == EventFormatVersions.V1:
                 for prev_event_tuple in prev_events:
-                    if not isinstance(prev_event_tuple, list) or len(prev_events) != 2:
+                    if (
+                        not isinstance(prev_event_tuple, list)
+                        or len(prev_event_tuple) != 2
+                    ):
                         logger.info("Invalid prev_events for %s", event_id)
                         break
 

From 3bf466698fcc5b7038878b6549e584e1e8573f56 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Fri, 4 Feb 2022 10:54:35 +0000
Subject: [PATCH 162/278] Hotfixes: Revert `commit()` OpenTracing hackery
 (#11906)

---
 synapse/storage/database.py | 40 +------------------------------------
 1 file changed, 1 insertion(+), 39 deletions(-)

diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index f1d975a8ff9..99802228c9f 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -41,7 +41,6 @@
 from typing_extensions import Literal
 
 from twisted.enterprise import adbapi
-from twisted.python import reflect
 
 from synapse.api.errors import StoreError
 from synapse.config.database import DatabaseConnectionConfig
@@ -91,20 +90,6 @@
 }
 
 
-class NastyConnectionWrapper:
-    def __init__(self, connection):
-        self._connection = connection
-        self._synapse_parent_context = None
-
-    def commit(self, *args, **kwargs):
-        with LoggingContext("db_commit", parent_context = self._synapse_parent_context):
-            with opentracing.start_active_span("db.conn.commit"):
-                self._connection.commit(*args, **kwargs)
-
-    def __getattr__(self, item):
-        return getattr(self._connection, item)
-
-
 def make_pool(
     reactor, db_config: DatabaseConnectionConfig, engine: BaseDatabaseEngine
 ) -> adbapi.ConnectionPool:
@@ -120,28 +105,9 @@ def _on_new_connection(conn):
         # etc.
         with LoggingContext("db.on_new_connection"):
             engine.on_new_connection(
-                LoggingDatabaseConnection(
-                    conn, engine, "on_new_connection"
-                )
+                LoggingDatabaseConnection(conn, engine, "on_new_connection")
             )
 
-    # HACK Patch the connection's commit function so that we can see
-    #      how long it's taking from Jaeger. To do that, we need to patch the
-    #      dbapi module's 'connect' method so that it returns a wrapped 'Connection'
-    #      object to the connection pool. (psycopg2's Connection class is a C thing
-    #      which we can't monkey-patch directly).
-    dbapiname = db_config.config["name"]
-    dbapi = reflect.namedModule(dbapiname)
-    if not getattr(dbapi, "_synapse_wrapped_dbapi", False):
-        real_connect = dbapi.connect
-
-        def wrapped_connect(*args, **kwargs):
-            conn = real_connect(*args, **kwargs)
-            return NastyConnectionWrapper(conn)
-
-        dbapi.connect = wrapped_connect
-        dbapi._synapse_wrapped_dbapi = True
-
     connection_pool = adbapi.ConnectionPool(
         db_config.config["name"],
         cp_reactor=reactor,
@@ -839,10 +805,6 @@ def inner_func(conn, *args, **kwargs):
             # pool).
             assert not self.engine.in_transaction(conn)
 
-            # HACK: record the parent context in 'conn' so that we can tie later commits
-            #    back to it
-            conn._connection._synapse_parent_context = parent_context
-
             with LoggingContext(
                 str(curr_context), parent_context=parent_context
             ) as context:

From ed2f158a380e31429759521883de95ee5cbc25bb Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 8 Feb 2022 12:57:17 +0000
Subject: [PATCH 163/278] Printf debugging for MSISDN validation (#11882)

This makes some attempt to keep CI happy too, but it probably ought not to.
---
 synapse/handlers/message.py    | 4 ++--
 synapse/rest/client/account.py | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 4411d901ac4..8f44af2d926 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -277,8 +277,8 @@ async def get_joined_members(self, requester: Requester, room_id: str) -> dict:
         # If this is an AS, double check that they are allowed to see the members.
         # This can either be because the AS user is in the room or because there
         # is a user in the room that the AS is "interested in"
-        if False and requester.app_service and user_id not in users_with_profile:
-            for uid in users_with_profile:
+        if False and requester.app_service and user_id not in users_with_profile:  # type: ignore[unreachable]
+            for uid in users_with_profile:  # type: ignore[unreachable]
                 if requester.app_service.is_interested_in_user(uid):
                     break
             else:
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 6b272658fc3..d9cb55e35b8 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -467,6 +467,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         next_link = body.get("next_link")  # Optional param
 
         msisdn = phone_number_to_msisdn(country, phone_number)
+        logger.info("Request #%s to verify ownership of %s", send_attempt, msisdn)
 
         if not check_3pid_allowed(self.hs, "msisdn", msisdn):
             raise SynapseError(
@@ -494,6 +495,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
                 await self.hs.get_clock().sleep(random.randint(1, 10) / 10)
                 return 200, {"sid": random_string(16)}
 
+            logger.info("MSISDN %s is already in use by %s", msisdn, existing_user_id)
             raise SynapseError(400, "MSISDN is already in use", Codes.THREEPID_IN_USE)
 
         if not self.hs.config.registration.account_threepid_delegate_msisdn:
@@ -518,6 +520,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         threepid_send_requests.labels(type="msisdn", reason="add_threepid").observe(
             send_attempt
         )
+        logger.info("MSISDN %s: got response from identity server: %s", msisdn, ret)
 
         return 200, ret
 

From f1b625ad56d539305e64f529bcfd25915e5d1be5 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 1 Mar 2022 15:30:22 +0000
Subject: [PATCH 164/278] matrix.org hotfixes: Back out in-flight state cache
 changes (#12117)

---
 changelog.d/10870.misc                      |   1 -
 changelog.d/11608.misc                      |   1 -
 synapse/storage/databases/state/store.py    | 219 ++----------
 tests/storage/databases/test_state_store.py | 352 --------------------
 4 files changed, 25 insertions(+), 548 deletions(-)
 delete mode 100644 changelog.d/10870.misc
 delete mode 100644 changelog.d/11608.misc
 delete mode 100644 tests/storage/databases/test_state_store.py

diff --git a/changelog.d/10870.misc b/changelog.d/10870.misc
deleted file mode 100644
index 3af049b9696..00000000000
--- a/changelog.d/10870.misc
+++ /dev/null
@@ -1 +0,0 @@
-Deduplicate in-flight requests in `_get_state_for_groups`.
diff --git a/changelog.d/11608.misc b/changelog.d/11608.misc
deleted file mode 100644
index 3af049b9696..00000000000
--- a/changelog.d/11608.misc
+++ /dev/null
@@ -1 +0,0 @@
-Deduplicate in-flight requests in `_get_state_for_groups`.
diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
index b8016f679a7..7614d76ac64 100644
--- a/synapse/storage/databases/state/store.py
+++ b/synapse/storage/databases/state/store.py
@@ -13,23 +13,11 @@
 # limitations under the License.
 
 import logging
-from typing import (
-    TYPE_CHECKING,
-    Collection,
-    Dict,
-    Iterable,
-    Optional,
-    Sequence,
-    Set,
-    Tuple,
-)
+from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple
 
 import attr
 
-from twisted.internet import defer
-
 from synapse.api.constants import EventTypes
-from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
     DatabasePool,
@@ -41,12 +29,6 @@
 from synapse.storage.types import Cursor
 from synapse.storage.util.sequence import build_sequence_generator
 from synapse.types import MutableStateMap, StateKey, StateMap
-from synapse.util import unwrapFirstError
-from synapse.util.async_helpers import (
-    AbstractObservableDeferred,
-    ObservableDeferred,
-    yieldable_gather_results,
-)
 from synapse.util.caches.descriptors import cached
 from synapse.util.caches.dictionary_cache import DictionaryCache
 
@@ -55,8 +37,8 @@
 
 logger = logging.getLogger(__name__)
 
+
 MAX_STATE_DELTA_HOPS = 100
-MAX_INFLIGHT_REQUESTS_PER_GROUP = 5
 
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
@@ -124,12 +106,6 @@ def __init__(
             500000,
         )
 
-        # Current ongoing get_state_for_groups in-flight requests
-        # {group ID -> {StateFilter -> ObservableDeferred}}
-        self._state_group_inflight_requests: Dict[
-            int, Dict[StateFilter, AbstractObservableDeferred[StateMap[str]]]
-        ] = {}
-
         def get_max_state_group_txn(txn: Cursor) -> int:
             txn.execute("SELECT COALESCE(max(id), 0) FROM state_groups")
             return txn.fetchone()[0]  # type: ignore
@@ -181,7 +157,7 @@ def _get_state_group_delta_txn(txn: LoggingTransaction) -> _GetStateGroupDelta:
         )
 
     async def _get_state_groups_from_groups(
-        self, groups: Sequence[int], state_filter: StateFilter
+        self, groups: List[int], state_filter: StateFilter
     ) -> Dict[int, StateMap[str]]:
         """Returns the state groups for a given set of groups from the
         database, filtering on types of state events.
@@ -252,165 +228,6 @@ def _get_state_for_group_using_cache(
 
         return state_filter.filter_state(state_dict_ids), not missing_types
 
-    def _get_state_for_group_gather_inflight_requests(
-        self, group: int, state_filter_left_over: StateFilter
-    ) -> Tuple[Sequence[AbstractObservableDeferred[StateMap[str]]], StateFilter]:
-        """
-        Attempts to gather in-flight requests and re-use them to retrieve state
-        for the given state group, filtered with the given state filter.
-
-        If there are more than MAX_INFLIGHT_REQUESTS_PER_GROUP in-flight requests,
-        and there *still* isn't enough information to complete the request by solely
-        reusing others, a full state filter will be requested to ensure that subsequent
-        requests can reuse this request.
-
-        Used as part of _get_state_for_group_using_inflight_cache.
-
-        Returns:
-            Tuple of two values:
-                A sequence of ObservableDeferreds to observe
-                A StateFilter representing what else needs to be requested to fulfill the request
-        """
-
-        inflight_requests = self._state_group_inflight_requests.get(group)
-        if inflight_requests is None:
-            # no requests for this group, need to retrieve it all ourselves
-            return (), state_filter_left_over
-
-        # The list of ongoing requests which will help narrow the current request.
-        reusable_requests = []
-        for (request_state_filter, request_deferred) in inflight_requests.items():
-            new_state_filter_left_over = state_filter_left_over.approx_difference(
-                request_state_filter
-            )
-            if new_state_filter_left_over == state_filter_left_over:
-                # Reusing this request would not gain us anything, so don't bother.
-                continue
-
-            reusable_requests.append(request_deferred)
-            state_filter_left_over = new_state_filter_left_over
-            if state_filter_left_over == StateFilter.none():
-                # we have managed to collect enough of the in-flight requests
-                # to cover our StateFilter and give us the state we need.
-                break
-
-        if (
-            state_filter_left_over != StateFilter.none()
-            and len(inflight_requests) >= MAX_INFLIGHT_REQUESTS_PER_GROUP
-        ):
-            # There are too many requests for this group.
-            # To prevent even more from building up, we request the whole
-            # state filter to guarantee that we can be reused by any subsequent
-            # requests for this state group.
-            return (), StateFilter.all()
-
-        return reusable_requests, state_filter_left_over
-
-    async def _get_state_for_group_fire_request(
-        self, group: int, state_filter: StateFilter
-    ) -> StateMap[str]:
-        """
-        Fires off a request to get the state at a state group,
-        potentially filtering by type and/or state key.
-
-        This request will be tracked in the in-flight request cache and automatically
-        removed when it is finished.
-
-        Used as part of _get_state_for_group_using_inflight_cache.
-
-        Args:
-            group: ID of the state group for which we want to get state
-            state_filter: the state filter used to fetch state from the database
-        """
-        cache_sequence_nm = self._state_group_cache.sequence
-        cache_sequence_m = self._state_group_members_cache.sequence
-
-        # Help the cache hit ratio by expanding the filter a bit
-        db_state_filter = state_filter.return_expanded()
-
-        async def _the_request() -> StateMap[str]:
-            group_to_state_dict = await self._get_state_groups_from_groups(
-                (group,), state_filter=db_state_filter
-            )
-
-            # Now let's update the caches
-            self._insert_into_cache(
-                group_to_state_dict,
-                db_state_filter,
-                cache_seq_num_members=cache_sequence_m,
-                cache_seq_num_non_members=cache_sequence_nm,
-            )
-
-            # Remove ourselves from the in-flight cache
-            group_request_dict = self._state_group_inflight_requests[group]
-            del group_request_dict[db_state_filter]
-            if not group_request_dict:
-                # If there are no more requests in-flight for this group,
-                # clean up the cache by removing the empty dictionary
-                del self._state_group_inflight_requests[group]
-
-            return group_to_state_dict[group]
-
-        # We don't immediately await the result, so must use run_in_background
-        # But we DO await the result before the current log context (request)
-        # finishes, so don't need to run it as a background process.
-        request_deferred = run_in_background(_the_request)
-        observable_deferred = ObservableDeferred(request_deferred, consumeErrors=True)
-
-        # Insert the ObservableDeferred into the cache
-        group_request_dict = self._state_group_inflight_requests.setdefault(group, {})
-        group_request_dict[db_state_filter] = observable_deferred
-
-        return await make_deferred_yieldable(observable_deferred.observe())
-
-    async def _get_state_for_group_using_inflight_cache(
-        self, group: int, state_filter: StateFilter
-    ) -> MutableStateMap[str]:
-        """
-        Gets the state at a state group, potentially filtering by type and/or
-        state key.
-
-        1. Calls _get_state_for_group_gather_inflight_requests to gather any
-           ongoing requests which might overlap with the current request.
-        2. Fires a new request, using _get_state_for_group_fire_request,
-           for any state which cannot be gathered from ongoing requests.
-
-        Args:
-            group: ID of the state group for which we want to get state
-            state_filter: the state filter used to fetch state from the database
-        Returns:
-            state map
-        """
-
-        # first, figure out whether we can re-use any in-flight requests
-        # (and if so, what would be left over)
-        (
-            reusable_requests,
-            state_filter_left_over,
-        ) = self._get_state_for_group_gather_inflight_requests(group, state_filter)
-
-        if state_filter_left_over != StateFilter.none():
-            # Fetch remaining state
-            remaining = await self._get_state_for_group_fire_request(
-                group, state_filter_left_over
-            )
-            assembled_state: MutableStateMap[str] = dict(remaining)
-        else:
-            assembled_state = {}
-
-        gathered = await make_deferred_yieldable(
-            defer.gatherResults(
-                (r.observe() for r in reusable_requests), consumeErrors=True
-            )
-        ).addErrback(unwrapFirstError)
-
-        # assemble our result.
-        for result_piece in gathered:
-            assembled_state.update(result_piece)
-
-        # Filter out any state that may be more than what we asked for.
-        return state_filter.filter_state(assembled_state)
-
     async def _get_state_for_groups(
         self, groups: Iterable[int], state_filter: Optional[StateFilter] = None
     ) -> Dict[int, MutableStateMap[str]]:
@@ -452,17 +269,31 @@ async def _get_state_for_groups(
         if not incomplete_groups:
             return state
 
-        async def get_from_cache(group: int, state_filter: StateFilter) -> None:
-            state[group] = await self._get_state_for_group_using_inflight_cache(
-                group, state_filter
-            )
+        cache_sequence_nm = self._state_group_cache.sequence
+        cache_sequence_m = self._state_group_members_cache.sequence
 
-        await yieldable_gather_results(
-            get_from_cache,
-            incomplete_groups,
-            state_filter,
+        # Help the cache hit ratio by expanding the filter a bit
+        db_state_filter = state_filter.return_expanded()
+
+        group_to_state_dict = await self._get_state_groups_from_groups(
+            list(incomplete_groups), state_filter=db_state_filter
         )
 
+        # Now lets update the caches
+        self._insert_into_cache(
+            group_to_state_dict,
+            db_state_filter,
+            cache_seq_num_members=cache_sequence_m,
+            cache_seq_num_non_members=cache_sequence_nm,
+        )
+
+        # And finally update the result dict, by filtering out any extra
+        # stuff we pulled out of the database.
+        for group, group_state_dict in group_to_state_dict.items():
+            # We just replace any existing entries, as we will have loaded
+            # everything we need from the database anyway.
+            state[group] = state_filter.filter_state(group_state_dict)
+
         return state
 
     def _get_state_for_groups_using_cache(
diff --git a/tests/storage/databases/test_state_store.py b/tests/storage/databases/test_state_store.py
deleted file mode 100644
index 076b6608094..00000000000
--- a/tests/storage/databases/test_state_store.py
+++ /dev/null
@@ -1,352 +0,0 @@
-# Copyright 2022 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import typing
-from typing import Dict, List, Sequence, Tuple
-from unittest.mock import patch
-
-from twisted.internet.defer import Deferred, ensureDeferred
-from twisted.test.proto_helpers import MemoryReactor
-
-from synapse.api.constants import EventTypes
-from synapse.storage.databases.state.store import MAX_INFLIGHT_REQUESTS_PER_GROUP
-from synapse.storage.state import StateFilter
-from synapse.types import StateMap
-from synapse.util import Clock
-
-from tests.unittest import HomeserverTestCase
-
-if typing.TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-# StateFilter for ALL non-m.room.member state events
-ALL_NON_MEMBERS_STATE_FILTER = StateFilter.freeze(
-    types={EventTypes.Member: set()},
-    include_others=True,
-)
-
-FAKE_STATE = {
-    (EventTypes.Member, "@alice:test"): "join",
-    (EventTypes.Member, "@bob:test"): "leave",
-    (EventTypes.Member, "@charlie:test"): "invite",
-    ("test.type", "a"): "AAA",
-    ("test.type", "b"): "BBB",
-    ("other.event.type", "state.key"): "123",
-}
-
-
-class StateGroupInflightCachingTestCase(HomeserverTestCase):
-    def prepare(
-        self, reactor: MemoryReactor, clock: Clock, homeserver: "HomeServer"
-    ) -> None:
-        self.state_storage = homeserver.get_storage().state
-        self.state_datastore = homeserver.get_datastores().state
-        # Patch out the `_get_state_groups_from_groups`.
-        # This is useful because it lets us pretend we have a slow database.
-        get_state_groups_patch = patch.object(
-            self.state_datastore,
-            "_get_state_groups_from_groups",
-            self._fake_get_state_groups_from_groups,
-        )
-        get_state_groups_patch.start()
-
-        self.addCleanup(get_state_groups_patch.stop)
-        self.get_state_group_calls: List[
-            Tuple[Tuple[int, ...], StateFilter, Deferred[Dict[int, StateMap[str]]]]
-        ] = []
-
-    def _fake_get_state_groups_from_groups(
-        self, groups: Sequence[int], state_filter: StateFilter
-    ) -> "Deferred[Dict[int, StateMap[str]]]":
-        d: Deferred[Dict[int, StateMap[str]]] = Deferred()
-        self.get_state_group_calls.append((tuple(groups), state_filter, d))
-        return d
-
-    def _complete_request_fake(
-        self,
-        groups: Tuple[int, ...],
-        state_filter: StateFilter,
-        d: "Deferred[Dict[int, StateMap[str]]]",
-    ) -> None:
-        """
-        Assemble a fake database response and complete the database request.
-        """
-
-        # Return a filtered copy of the fake state
-        d.callback({group: state_filter.filter_state(FAKE_STATE) for group in groups})
-
-    def test_duplicate_requests_deduplicated(self) -> None:
-        """
-        Tests that duplicate requests for state are deduplicated.
-
-        This test:
-        - requests some state (state group 42, 'all' state filter)
-        - requests it again, before the first request finishes
-        - checks to see that only one database query was made
-        - completes the database query
-        - checks that both requests see the same retrieved state
-        """
-        req1 = ensureDeferred(
-            self.state_datastore._get_state_for_group_using_inflight_cache(
-                42, StateFilter.all()
-            )
-        )
-        self.pump(by=0.1)
-
-        # This should have gone to the database
-        self.assertEqual(len(self.get_state_group_calls), 1)
-        self.assertFalse(req1.called)
-
-        req2 = ensureDeferred(
-            self.state_datastore._get_state_for_group_using_inflight_cache(
-                42, StateFilter.all()
-            )
-        )
-        self.pump(by=0.1)
-
-        # No more calls should have gone to the database
-        self.assertEqual(len(self.get_state_group_calls), 1)
-        self.assertFalse(req1.called)
-        self.assertFalse(req2.called)
-
-        groups, sf, d = self.get_state_group_calls[0]
-        self.assertEqual(groups, (42,))
-        self.assertEqual(sf, StateFilter.all())
-
-        # Now we can complete the request
-        self._complete_request_fake(groups, sf, d)
-
-        self.assertEqual(self.get_success(req1), FAKE_STATE)
-        self.assertEqual(self.get_success(req2), FAKE_STATE)
-
-    def test_smaller_request_deduplicated(self) -> None:
-        """
-        Tests that duplicate requests for state are deduplicated.
-
-        This test:
-        - requests some state (state group 42, 'all' state filter)
-        - requests a subset of that state, before the first request finishes
-        - checks to see that only one database query was made
-        - completes the database query
-        - checks that both requests see the correct retrieved state
-        """
-        req1 = ensureDeferred(
-            self.state_datastore._get_state_for_group_using_inflight_cache(
-                42, StateFilter.from_types((("test.type", None),))
-            )
-        )
-        self.pump(by=0.1)
-
-        # This should have gone to the database
-        self.assertEqual(len(self.get_state_group_calls), 1)
-        self.assertFalse(req1.called)
-
-        req2 = ensureDeferred(
-            self.state_datastore._get_state_for_group_using_inflight_cache(
-                42, StateFilter.from_types((("test.type", "b"),))
-            )
-        )
-        self.pump(by=0.1)
-
-        # No more calls should have gone to the database, because the second
-        # request was already in the in-flight cache!
-        self.assertEqual(len(self.get_state_group_calls), 1)
-        self.assertFalse(req1.called)
-        self.assertFalse(req2.called)
-
-        groups, sf, d = self.get_state_group_calls[0]
-        self.assertEqual(groups, (42,))
-        # The state filter is expanded internally for increased cache hit rate,
-        # so we the database sees a wider state filter than requested.
-        self.assertEqual(sf, ALL_NON_MEMBERS_STATE_FILTER)
-
-        # Now we can complete the request
-        self._complete_request_fake(groups, sf, d)
-
-        self.assertEqual(
-            self.get_success(req1),
-            {("test.type", "a"): "AAA", ("test.type", "b"): "BBB"},
-        )
-        self.assertEqual(self.get_success(req2), {("test.type", "b"): "BBB"})
-
-    def test_partially_overlapping_request_deduplicated(self) -> None:
-        """
-        Tests that partially-overlapping requests are partially deduplicated.
-
-        This test:
-        - requests a single type of wildcard state
-          (This is internally expanded to be all non-member state)
-        - requests the entire state in parallel
-        - checks to see that two database queries were made, but that the second
-          one is only for member state.
-        - completes the database queries
-        - checks that both requests have the correct result.
-        """
-
-        req1 = ensureDeferred(
-            self.state_datastore._get_state_for_group_using_inflight_cache(
-                42, StateFilter.from_types((("test.type", None),))
-            )
-        )
-        self.pump(by=0.1)
-
-        # This should have gone to the database
-        self.assertEqual(len(self.get_state_group_calls), 1)
-        self.assertFalse(req1.called)
-
-        req2 = ensureDeferred(
-            self.state_datastore._get_state_for_group_using_inflight_cache(
-                42, StateFilter.all()
-            )
-        )
-        self.pump(by=0.1)
-
-        # Because it only partially overlaps, this also went to the database
-        self.assertEqual(len(self.get_state_group_calls), 2)
-        self.assertFalse(req1.called)
-        self.assertFalse(req2.called)
-
-        # First request:
-        groups, sf, d = self.get_state_group_calls[0]
-        self.assertEqual(groups, (42,))
-        # The state filter is expanded internally for increased cache hit rate,
-        # so we the database sees a wider state filter than requested.
-        self.assertEqual(sf, ALL_NON_MEMBERS_STATE_FILTER)
-        self._complete_request_fake(groups, sf, d)
-
-        # Second request:
-        groups, sf, d = self.get_state_group_calls[1]
-        self.assertEqual(groups, (42,))
-        # The state filter is narrowed to only request membership state, because
-        # the remainder of the state is already being queried in the first request!
-        self.assertEqual(
-            sf, StateFilter.freeze({EventTypes.Member: None}, include_others=False)
-        )
-        self._complete_request_fake(groups, sf, d)
-
-        # Check the results are correct
-        self.assertEqual(
-            self.get_success(req1),
-            {("test.type", "a"): "AAA", ("test.type", "b"): "BBB"},
-        )
-        self.assertEqual(self.get_success(req2), FAKE_STATE)
-
-    def test_in_flight_requests_stop_being_in_flight(self) -> None:
-        """
-        Tests that in-flight request deduplication doesn't somehow 'hold on'
-        to completed requests: once they're done, they're taken out of the
-        in-flight cache.
-        """
-        req1 = ensureDeferred(
-            self.state_datastore._get_state_for_group_using_inflight_cache(
-                42, StateFilter.all()
-            )
-        )
-        self.pump(by=0.1)
-
-        # This should have gone to the database
-        self.assertEqual(len(self.get_state_group_calls), 1)
-        self.assertFalse(req1.called)
-
-        # Complete the request right away.
-        self._complete_request_fake(*self.get_state_group_calls[0])
-        self.assertTrue(req1.called)
-
-        # Send off another request
-        req2 = ensureDeferred(
-            self.state_datastore._get_state_for_group_using_inflight_cache(
-                42, StateFilter.all()
-            )
-        )
-        self.pump(by=0.1)
-
-        # It should have gone to the database again, because the previous request
-        # isn't in-flight and therefore isn't available for deduplication.
-        self.assertEqual(len(self.get_state_group_calls), 2)
-        self.assertFalse(req2.called)
-
-        # Complete the request right away.
-        self._complete_request_fake(*self.get_state_group_calls[1])
-        self.assertTrue(req2.called)
-        groups, sf, d = self.get_state_group_calls[0]
-
-        self.assertEqual(self.get_success(req1), FAKE_STATE)
-        self.assertEqual(self.get_success(req2), FAKE_STATE)
-
-    def test_inflight_requests_capped(self) -> None:
-        """
-        Tests that the number of in-flight requests is capped to 5.
-
-        - requests several pieces of state separately
-          (5 to hit the limit, 1 to 'shunt out', another that comes after the
-          group has been 'shunted out')
-        - checks to see that the torrent of requests is shunted out by
-          rewriting one of the filters as the 'all' state filter
-        - requests after that one do not cause any additional queries
-        """
-        # 5 at the time of writing.
-        CAP_COUNT = MAX_INFLIGHT_REQUESTS_PER_GROUP
-
-        reqs = []
-
-        # Request 7 different keys (1 to 7) of the `some.state` type.
-        for req_id in range(CAP_COUNT + 2):
-            reqs.append(
-                ensureDeferred(
-                    self.state_datastore._get_state_for_group_using_inflight_cache(
-                        42,
-                        StateFilter.freeze(
-                            {"some.state": {str(req_id + 1)}}, include_others=False
-                        ),
-                    )
-                )
-            )
-        self.pump(by=0.1)
-
-        # There should only be 6 calls to the database, not 7.
-        self.assertEqual(len(self.get_state_group_calls), CAP_COUNT + 1)
-
-        # Assert that the first 5 are exact requests for the individual pieces
-        # wanted
-        for req_id in range(CAP_COUNT):
-            groups, sf, d = self.get_state_group_calls[req_id]
-            self.assertEqual(
-                sf,
-                StateFilter.freeze(
-                    {"some.state": {str(req_id + 1)}}, include_others=False
-                ),
-            )
-
-        # The 6th request should be the 'all' state filter
-        groups, sf, d = self.get_state_group_calls[CAP_COUNT]
-        self.assertEqual(sf, StateFilter.all())
-
-        # Complete the queries and check which requests complete as a result
-        for req_id in range(CAP_COUNT):
-            # This request should not have been completed yet
-            self.assertFalse(reqs[req_id].called)
-
-            groups, sf, d = self.get_state_group_calls[req_id]
-            self._complete_request_fake(groups, sf, d)
-
-            # This should have only completed this one request
-            self.assertTrue(reqs[req_id].called)
-
-        # Now complete the final query; the last 2 requests should complete
-        # as a result
-        self.assertFalse(reqs[CAP_COUNT].called)
-        self.assertFalse(reqs[CAP_COUNT + 1].called)
-        groups, sf, d = self.get_state_group_calls[CAP_COUNT]
-        self._complete_request_fake(groups, sf, d)
-        self.assertTrue(reqs[CAP_COUNT].called)
-        self.assertTrue(reqs[CAP_COUNT + 1].called)

From 36383d48f3efa01039898dac294ecce0aa2d172e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 4 Apr 2022 19:07:49 +0100
Subject: [PATCH 165/278] Prefill the device_list_stream_cache

---
 synapse/replication/slave/storage/devices.py | 12 +++++++++++-
 synapse/storage/databases/main/__init__.py   | 12 +++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py
index 0ffd34f1dad..a0dd9d2b893 100644
--- a/synapse/replication/slave/storage/devices.py
+++ b/synapse/replication/slave/storage/devices.py
@@ -47,8 +47,18 @@ def __init__(
             ],
         )
         device_list_max = self._device_list_id_gen.get_current_token()
+        device_list_prefill, min_device_list_id = self.db_pool.get_cache_dict(
+            db_conn,
+            "device_lists_stream",
+            entity_column="user_id",
+            stream_column="stream_id",
+            max_value=device_list_max,
+            limit=1000,
+        )
         self._device_list_stream_cache = StreamChangeCache(
-            "DeviceListStreamChangeCache", device_list_max
+            "DeviceListStreamChangeCache",
+            min_device_list_id,
+            prefilled_cache=device_list_prefill,
         )
         self._user_signature_stream_cache = StreamChangeCache(
             "UserSignatureStreamChangeCache", device_list_max
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index f024761ba7b..68abf6783fe 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -183,8 +183,18 @@ def __init__(
         super().__init__(database, db_conn, hs)
 
         device_list_max = self._device_list_id_gen.get_current_token()
+        device_list_prefill, min_device_list_id = self.db_pool.get_cache_dict(
+            db_conn,
+            "device_lists_stream",
+            entity_column="user_id",
+            stream_column="stream_id",
+            max_value=device_list_max,
+            limit=1000,
+        )
         self._device_list_stream_cache = StreamChangeCache(
-            "DeviceListStreamChangeCache", device_list_max
+            "DeviceListStreamChangeCache",
+            min_device_list_id,
+            prefilled_cache=device_list_prefill,
         )
         self._user_signature_stream_cache = StreamChangeCache(
             "UserSignatureStreamChangeCache", device_list_max

From f986db2adb6f1e97a51299fb5af6abfa49e24f19 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 4 Apr 2022 19:08:42 +0100
Subject: [PATCH 166/278] Newsfile

---
 changelog.d/12367.feature | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/12367.feature

diff --git a/changelog.d/12367.feature b/changelog.d/12367.feature
new file mode 100644
index 00000000000..89403ceeef3
--- /dev/null
+++ b/changelog.d/12367.feature
@@ -0,0 +1 @@
+Reduce overhead of restarting synchrotrons

From ea2d01366ce2a68c11b0ed8f21d90a6596463215 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 4 Apr 2022 19:13:58 +0100
Subject: [PATCH 167/278] Newsfile

---
 changelog.d/12367.feature | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changelog.d/12367.feature b/changelog.d/12367.feature
index 89403ceeef3..34bb60e9660 100644
--- a/changelog.d/12367.feature
+++ b/changelog.d/12367.feature
@@ -1 +1 @@
-Reduce overhead of restarting synchrotrons
+Reduce overhead of restarting synchrotrons.

From 771d2041cd02e4f16bd21d0f8580806911648926 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 16 May 2022 15:43:00 +0100
Subject: [PATCH 168/278] Fix query performance for /sync

Broke in #12729.
---
 synapse/storage/databases/main/stream.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 3c3137fe64c..0373af86c82 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -750,7 +750,7 @@ def _f(txn: LoggingTransaction) -> Optional[Tuple[int, int, str]]:
                 WHERE room_id = ?
                     AND stream_ordering <= ?
                     AND NOT outlier
-                    AND rejections.reason IS NULL
+                    AND rejections.event_id IS NULL
                 ORDER BY stream_ordering DESC
                 LIMIT 1
             """

From 4cee2a6330c572117b56246bae8571ccc5445ee9 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 18 May 2022 12:23:37 +0100
Subject: [PATCH 169/278] Fixup changelog

---
 CHANGES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index e10ac0314ab..9bb2357572c 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,7 +1,7 @@
 Synapse 1.59.1 (2022-05-18)
 ===========================
 
-This release fixes a long-standing issue which could prevent Synapse's user directory for updating properly.
+This release fixes a long-standing issue which could prevent Synapse's user directory from updating properly.
 
 Bugfixes
 ----------------

From 7ba03360109e3868ddd79df825d76cdd7120720d Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 23 Aug 2022 11:31:19 +0100
Subject: [PATCH 170/278] Remove the hotfix-specific hack for auth on
 room_members

This reverts fae708c0e8c35930f1172322b7c0e9f0b1b3f9a4.

We believe this to be unnecessary---other Synapse deployments do not
have this patch, and we are not aware of bridging problems as a result.

Related:

- https://github.com/matrix-org/matrix-appservice-irc/issues/506
- https://github.com/matrix-org/synapse/issues/4826
---
 synapse/handlers/message.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 9aea02b6b9a..acd3de06f6f 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -339,9 +339,7 @@ async def get_joined_members(self, requester: Requester, room_id: str) -> dict:
         # This can either be because the AS user is in the room or because there
         # is a user in the room that the AS is "interested in"
         if (
-            False  # See https://github.com/matrix-org/matrix-appservice-irc/issues/506
-                   # and https://github.com/matrix-org/synapse/issues/4826#issuecomment-1028105662
-            and requester.app_service
+            requester.app_service
             and requester.user.to_string() not in users_with_profile
         ):
             for uid in users_with_profile:

From 220af1df54c3b4e5f1d7ec8fb62b375c99a2bbe2 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 2 Nov 2022 12:28:24 +0000
Subject: [PATCH 171/278] Revert "Switch search SQL to triple-quote strings.
 (#14311)"

This reverts commit 81815e0561eea91dbf0c29731589fac2e6f98a40.
---
 synapse/storage/databases/main/search.py | 188 +++++++++++------------
 1 file changed, 89 insertions(+), 99 deletions(-)

diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index e9588d17551..594b935614f 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -80,11 +80,11 @@ def store_search_entries_txn(
         if not self.hs.config.server.enable_search:
             return
         if isinstance(self.database_engine, PostgresEngine):
-            sql = """
-            INSERT INTO event_search
-            (event_id, room_id, key, vector, stream_ordering, origin_server_ts)
-            VALUES (?,?,?,to_tsvector('english', ?),?,?)
-            """
+            sql = (
+                "INSERT INTO event_search"
+                " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)"
+                " VALUES (?,?,?,to_tsvector('english', ?),?,?)"
+            )
 
             args1 = (
                 (
@@ -101,20 +101,20 @@ def store_search_entries_txn(
             txn.execute_batch(sql, args1)
 
         elif isinstance(self.database_engine, Sqlite3Engine):
-            self.db_pool.simple_insert_many_txn(
-                txn,
-                table="event_search",
-                keys=("event_id", "room_id", "key", "value"),
-                values=(
-                    (
-                        entry.event_id,
-                        entry.room_id,
-                        entry.key,
-                        _clean_value_for_search(entry.value),
-                    )
-                    for entry in entries
-                ),
+            sql = (
+                "INSERT INTO event_search (event_id, room_id, key, value)"
+                " VALUES (?,?,?,?)"
+            )
+            args2 = (
+                (
+                    entry.event_id,
+                    entry.room_id,
+                    entry.key,
+                    _clean_value_for_search(entry.value),
+                )
+                for entry in entries
             )
+            txn.execute_batch(sql, args2)
 
         else:
             # This should be unreachable.
@@ -162,17 +162,15 @@ async def _background_reindex_search(
         TYPES = ["m.room.name", "m.room.message", "m.room.topic"]
 
         def reindex_search_txn(txn: LoggingTransaction) -> int:
-            sql = """
-            SELECT stream_ordering, event_id, room_id, type, json, origin_server_ts
-            FROM events
-            JOIN event_json USING (room_id, event_id)
-            WHERE ? <= stream_ordering AND stream_ordering < ?
-            AND (%s)
-            ORDER BY stream_ordering DESC
-            LIMIT ?
-            """ % (
-                " OR ".join("type = '%s'" % (t,) for t in TYPES),
-            )
+            sql = (
+                "SELECT stream_ordering, event_id, room_id, type, json, "
+                " origin_server_ts FROM events"
+                " JOIN event_json USING (room_id, event_id)"
+                " WHERE ? <= stream_ordering AND stream_ordering < ?"
+                " AND (%s)"
+                " ORDER BY stream_ordering DESC"
+                " LIMIT ?"
+            ) % (" OR ".join("type = '%s'" % (t,) for t in TYPES),)
 
             txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
 
@@ -286,10 +284,8 @@ def create_index(conn: LoggingDatabaseConnection) -> None:
 
                 try:
                     c.execute(
-                        """
-                        CREATE INDEX CONCURRENTLY event_search_fts_idx
-                        ON event_search USING GIN (vector)
-                        """
+                        "CREATE INDEX CONCURRENTLY event_search_fts_idx"
+                        " ON event_search USING GIN (vector)"
                     )
                 except psycopg2.ProgrammingError as e:
                     logger.warning(
@@ -327,16 +323,12 @@ def create_index(conn: LoggingDatabaseConnection) -> None:
                 # We create with NULLS FIRST so that when we search *backwards*
                 # we get the ones with non null origin_server_ts *first*
                 c.execute(
-                    """
-                    CREATE INDEX CONCURRENTLY event_search_room_order
-                    ON event_search(room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)
-                    """
+                    "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search("
+                    "room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)"
                 )
                 c.execute(
-                    """
-                    CREATE INDEX CONCURRENTLY event_search_order
-                    ON event_search(origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)
-                    """
+                    "CREATE INDEX CONCURRENTLY event_search_order ON event_search("
+                    "origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)"
                 )
                 conn.set_session(autocommit=False)
 
@@ -353,14 +345,14 @@ def create_index(conn: LoggingDatabaseConnection) -> None:
             )
 
         def reindex_search_txn(txn: LoggingTransaction) -> Tuple[int, bool]:
-            sql = """
-            UPDATE event_search AS es
-            SET stream_ordering = e.stream_ordering, origin_server_ts = e.origin_server_ts
-            FROM events AS e
-            WHERE e.event_id = es.event_id
-            AND ? <= e.stream_ordering AND e.stream_ordering < ?
-            RETURNING es.stream_ordering
-            """
+            sql = (
+                "UPDATE event_search AS es SET stream_ordering = e.stream_ordering,"
+                " origin_server_ts = e.origin_server_ts"
+                " FROM events AS e"
+                " WHERE e.event_id = es.event_id"
+                " AND ? <= e.stream_ordering AND e.stream_ordering < ?"
+                " RETURNING es.stream_ordering"
+            )
 
             min_stream_id = max_stream_id - batch_size
             txn.execute(sql, (min_stream_id, max_stream_id))
@@ -464,33 +456,33 @@ async def search_msgs(
         if isinstance(self.database_engine, PostgresEngine):
             search_query = search_term
             tsquery_func = self.database_engine.tsquery_func
-            sql = f"""
-            SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) AS rank,
-            room_id, event_id
-            FROM event_search
-            WHERE vector @@  {tsquery_func}('english', ?)
-            """
+            sql = (
+                f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) AS rank,"
+                " room_id, event_id"
+                " FROM event_search"
+                f" WHERE vector @@  {tsquery_func}('english', ?)"
+            )
             args = [search_query, search_query] + args
 
-            count_sql = f"""
-            SELECT room_id, count(*) as count FROM event_search
-            WHERE vector @@ {tsquery_func}('english', ?)
-            """
+            count_sql = (
+                "SELECT room_id, count(*) as count FROM event_search"
+                f" WHERE vector @@ {tsquery_func}('english', ?)"
+            )
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
             search_query = _parse_query_for_sqlite(search_term)
 
-            sql = """
-            SELECT rank(matchinfo(event_search)) as rank, room_id, event_id
-            FROM event_search
-            WHERE value MATCH ?
-            """
+            sql = (
+                "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id"
+                " FROM event_search"
+                " WHERE value MATCH ?"
+            )
             args = [search_query] + args
 
-            count_sql = """
-            SELECT room_id, count(*) as count FROM event_search
-            WHERE value MATCH ?
-            """
+            count_sql = (
+                "SELECT room_id, count(*) as count FROM event_search"
+                " WHERE value MATCH ?"
+            )
             count_args = [search_query] + count_args
         else:
             # This should be unreachable.
@@ -596,27 +588,26 @@ async def search_rooms(
                 raise SynapseError(400, "Invalid pagination token")
 
             clauses.append(
-                """
-                (origin_server_ts < ? OR (origin_server_ts = ? AND stream_ordering < ?))
-                """
+                "(origin_server_ts < ?"
+                " OR (origin_server_ts = ? AND stream_ordering < ?))"
             )
             args.extend([origin_server_ts, origin_server_ts, stream])
 
         if isinstance(self.database_engine, PostgresEngine):
             search_query = search_term
             tsquery_func = self.database_engine.tsquery_func
-            sql = f"""
-            SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) as rank,
-            origin_server_ts, stream_ordering, room_id, event_id
-            FROM event_search
-            WHERE vector @@ {tsquery_func}('english', ?) AND
-            """
+            sql = (
+                f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) as rank,"
+                " origin_server_ts, stream_ordering, room_id, event_id"
+                " FROM event_search"
+                f" WHERE vector @@ {tsquery_func}('english', ?) AND "
+            )
             args = [search_query, search_query] + args
 
-            count_sql = f"""
-            SELECT room_id, count(*) as count FROM event_search
-            WHERE vector @@ {tsquery_func}('english', ?) AND
-            """
+            count_sql = (
+                "SELECT room_id, count(*) as count FROM event_search"
+                f" WHERE vector @@ {tsquery_func}('english', ?) AND "
+            )
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
 
@@ -628,24 +619,23 @@ async def search_rooms(
             # in the events table to get the topological ordering. We need
             # to use the indexes in this order because sqlite refuses to
             # MATCH unless it uses the full text search index
-            sql = """
-            SELECT
-                rank(matchinfo) as rank, room_id, event_id, origin_server_ts, stream_ordering
-            FROM (
-                SELECT key, event_id, matchinfo(event_search) as matchinfo
-                FROM event_search
-                WHERE value MATCH ?
+            sql = (
+                "SELECT rank(matchinfo) as rank, room_id, event_id,"
+                " origin_server_ts, stream_ordering"
+                " FROM (SELECT key, event_id, matchinfo(event_search) as matchinfo"
+                " FROM event_search"
+                " WHERE value MATCH ?"
+                " )"
+                " CROSS JOIN events USING (event_id)"
+                " WHERE "
             )
-            CROSS JOIN events USING (event_id)
-            WHERE
-            """
             search_query = _parse_query_for_sqlite(search_term)
             args = [search_query] + args
 
-            count_sql = """
-            SELECT room_id, count(*) as count FROM event_search
-            WHERE value MATCH ? AND
-            """
+            count_sql = (
+                "SELECT room_id, count(*) as count FROM event_search"
+                " WHERE value MATCH ? AND "
+            )
             count_args = [search_query] + count_args
         else:
             # This should be unreachable.
@@ -657,10 +647,10 @@ async def search_rooms(
         # We add an arbitrary limit here to ensure we don't try to pull the
         # entire table from the database.
         if isinstance(self.database_engine, PostgresEngine):
-            sql += """
-            ORDER BY origin_server_ts DESC NULLS LAST, stream_ordering DESC NULLS LAST
-            LIMIT ?
-            """
+            sql += (
+                " ORDER BY origin_server_ts DESC NULLS LAST,"
+                " stream_ordering DESC NULLS LAST LIMIT ?"
+            )
         elif isinstance(self.database_engine, Sqlite3Engine):
             sql += " ORDER BY origin_server_ts DESC, stream_ordering DESC LIMIT ?"
         else:

From 7f77f1386c94c753755b79d7d7025e958b228282 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 2 Nov 2022 12:28:47 +0000
Subject: [PATCH 172/278] Revert "Fix tests for change in PostgreSQL 14
 behavior change. (#14310)"

This reverts commit 67583281e3f8ea923eedbc56a4c85c7ba75d1582.
---
 synapse/storage/databases/main/search.py |  5 +++--
 tests/storage/test_room_search.py        | 16 ++++------------
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index 594b935614f..a89fc54c2cb 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -824,8 +824,9 @@ def _tokenize_query(query: str) -> TokenList:
     in_phrase = False
     parts = deque(query.split('"'))
     for i, part in enumerate(parts):
-        # The contents inside double quotes is treated as a phrase.
-        in_phrase = bool(i % 2)
+        # The contents inside double quotes is treated as a phrase, a trailing
+        # double quote is not implied.
+        in_phrase = bool(i % 2) and i != (len(parts) - 1)
 
         # Pull out the individual words, discarding any non-word characters.
         words = deque(re.findall(r"([\w\-]+)", part, re.UNICODE))
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index 868b5bee848..9ddc19900af 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -239,6 +239,7 @@ class MessageSearchTest(HomeserverTestCase):
         ("fox -nope", (True, False)),
         ("fox -brown", (False, True)),
         ('"fox" quick', True),
+        ('"fox quick', True),
         ('"quick brown', True),
         ('" quick "', True),
         ('" nope"', False),
@@ -268,15 +269,6 @@ def prepare(
         response = self.helper.send(self.room_id, self.PHRASE, tok=self.access_token)
         self.assertIn("event_id", response)
 
-        # The behaviour of a missing trailing double quote changed in PostgreSQL 14
-        # from ignoring the initial double quote to treating it as a phrase.
-        main_store = homeserver.get_datastores().main
-        found = False
-        if isinstance(main_store.database_engine, PostgresEngine):
-            assert main_store.database_engine._version is not None
-            found = main_store.database_engine._version < 140000
-        self.COMMON_CASES.append(('"fox quick', (found, True)))
-
     def test_tokenize_query(self) -> None:
         """Test the custom logic to tokenize a user's query."""
         cases = (
@@ -288,9 +280,9 @@ def test_tokenize_query(self) -> None:
             ("fox -brown", ["fox", SearchToken.Not, "brown"]),
             ("- fox", [SearchToken.Not, "fox"]),
             ('"fox" quick', [Phrase(["fox"]), SearchToken.And, "quick"]),
-            # No trailing double quote.
-            ('"fox quick', [Phrase(["fox", "quick"])]),
-            ('"-fox quick', [Phrase(["-fox", "quick"])]),
+            # No trailing double quoe.
+            ('"fox quick', ["fox", SearchToken.And, "quick"]),
+            ('"-fox quick', [SearchToken.Not, "fox", SearchToken.And, "quick"]),
             ('" quick "', [Phrase(["quick"])]),
             (
                 'q"uick brow"n',

From 7e0dd52782806592df61a1a517660b694515dc27 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 2 Nov 2022 12:29:34 +0000
Subject: [PATCH 173/278] Revert "Unified search query syntax using the
 full-text search capabilities of the underlying DB. (#11635)"

This reverts commit d902181de98399d90c46c4e4e2cf631064757941.
---
 synapse/storage/databases/main/search.py      | 197 +++-------------
 synapse/storage/engines/postgres.py           |  16 --
 .../73/10_update_sqlite_fts4_tokenizer.py     |  62 -----
 tests/storage/test_room_search.py             | 213 ------------------
 4 files changed, 35 insertions(+), 453 deletions(-)
 delete mode 100644 synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py

diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index a89fc54c2cb..1b79acf9555 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -11,22 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import enum
+
 import logging
 import re
-from collections import deque
-from dataclasses import dataclass
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Collection,
-    Iterable,
-    List,
-    Optional,
-    Set,
-    Tuple,
-    Union,
-)
+from typing import TYPE_CHECKING, Any, Collection, Iterable, List, Optional, Set, Tuple
 
 import attr
 
@@ -39,7 +27,7 @@
     LoggingTransaction,
 )
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
-from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
 from synapse.types import JsonDict
 
 if TYPE_CHECKING:
@@ -433,6 +421,8 @@ async def search_msgs(
         """
         clauses = []
 
+        search_query = _parse_query(self.database_engine, search_term)
+
         args: List[Any] = []
 
         # Make sure we don't explode because the person is in too many rooms.
@@ -454,24 +444,20 @@ async def search_msgs(
         count_clauses = clauses
 
         if isinstance(self.database_engine, PostgresEngine):
-            search_query = search_term
-            tsquery_func = self.database_engine.tsquery_func
             sql = (
-                f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) AS rank,"
+                "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) AS rank,"
                 " room_id, event_id"
                 " FROM event_search"
-                f" WHERE vector @@  {tsquery_func}('english', ?)"
+                " WHERE vector @@ to_tsquery('english', ?)"
             )
             args = [search_query, search_query] + args
 
             count_sql = (
                 "SELECT room_id, count(*) as count FROM event_search"
-                f" WHERE vector @@ {tsquery_func}('english', ?)"
+                " WHERE vector @@ to_tsquery('english', ?)"
             )
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
-            search_query = _parse_query_for_sqlite(search_term)
-
             sql = (
                 "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id"
                 " FROM event_search"
@@ -483,7 +469,7 @@ async def search_msgs(
                 "SELECT room_id, count(*) as count FROM event_search"
                 " WHERE value MATCH ?"
             )
-            count_args = [search_query] + count_args
+            count_args = [search_term] + count_args
         else:
             # This should be unreachable.
             raise Exception("Unrecognized database engine")
@@ -515,9 +501,7 @@ async def search_msgs(
 
         highlights = None
         if isinstance(self.database_engine, PostgresEngine):
-            highlights = await self._find_highlights_in_postgres(
-                search_query, events, tsquery_func
-            )
+            highlights = await self._find_highlights_in_postgres(search_query, events)
 
         count_sql += " GROUP BY room_id"
 
@@ -526,6 +510,7 @@ async def search_msgs(
         )
 
         count = sum(row["count"] for row in count_results if row["room_id"] in room_ids)
+
         return {
             "results": [
                 {"event": event_map[r["event_id"]], "rank": r["rank"]}
@@ -557,6 +542,9 @@ async def search_rooms(
             Each match as a dictionary.
         """
         clauses = []
+
+        search_query = _parse_query(self.database_engine, search_term)
+
         args: List[Any] = []
 
         # Make sure we don't explode because the person is in too many rooms.
@@ -594,23 +582,20 @@ async def search_rooms(
             args.extend([origin_server_ts, origin_server_ts, stream])
 
         if isinstance(self.database_engine, PostgresEngine):
-            search_query = search_term
-            tsquery_func = self.database_engine.tsquery_func
             sql = (
-                f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) as rank,"
+                "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) as rank,"
                 " origin_server_ts, stream_ordering, room_id, event_id"
                 " FROM event_search"
-                f" WHERE vector @@ {tsquery_func}('english', ?) AND "
+                " WHERE vector @@ to_tsquery('english', ?) AND "
             )
             args = [search_query, search_query] + args
 
             count_sql = (
                 "SELECT room_id, count(*) as count FROM event_search"
-                f" WHERE vector @@ {tsquery_func}('english', ?) AND "
+                " WHERE vector @@ to_tsquery('english', ?) AND "
             )
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
-
             # We use CROSS JOIN here to ensure we use the right indexes.
             # https://sqlite.org/optoverview.html#crossjoin
             #
@@ -629,14 +614,13 @@ async def search_rooms(
                 " CROSS JOIN events USING (event_id)"
                 " WHERE "
             )
-            search_query = _parse_query_for_sqlite(search_term)
             args = [search_query] + args
 
             count_sql = (
                 "SELECT room_id, count(*) as count FROM event_search"
                 " WHERE value MATCH ? AND "
             )
-            count_args = [search_query] + count_args
+            count_args = [search_term] + count_args
         else:
             # This should be unreachable.
             raise Exception("Unrecognized database engine")
@@ -676,9 +660,7 @@ async def search_rooms(
 
         highlights = None
         if isinstance(self.database_engine, PostgresEngine):
-            highlights = await self._find_highlights_in_postgres(
-                search_query, events, tsquery_func
-            )
+            highlights = await self._find_highlights_in_postgres(search_query, events)
 
         count_sql += " GROUP BY room_id"
 
@@ -704,7 +686,7 @@ async def search_rooms(
         }
 
     async def _find_highlights_in_postgres(
-        self, search_query: str, events: List[EventBase], tsquery_func: str
+        self, search_query: str, events: List[EventBase]
     ) -> Set[str]:
         """Given a list of events and a search term, return a list of words
         that match from the content of the event.
@@ -715,7 +697,6 @@ async def _find_highlights_in_postgres(
         Args:
             search_query
             events: A list of events
-            tsquery_func: The tsquery_* function to use when making queries
 
         Returns:
             A set of strings.
@@ -748,7 +729,7 @@ def f(txn: LoggingTransaction) -> Set[str]:
                 while stop_sel in value:
                     stop_sel += ">"
 
-                query = f"SELECT ts_headline(?, {tsquery_func}('english', ?), %s)" % (
+                query = "SELECT ts_headline(?, to_tsquery('english', ?), %s)" % (
                     _to_postgres_options(
                         {
                             "StartSel": start_sel,
@@ -779,128 +760,20 @@ def _to_postgres_options(options_dict: JsonDict) -> str:
     return "'%s'" % (",".join("%s=%s" % (k, v) for k, v in options_dict.items()),)
 
 
-@dataclass
-class Phrase:
-    phrase: List[str]
-
-
-class SearchToken(enum.Enum):
-    Not = enum.auto()
-    Or = enum.auto()
-    And = enum.auto()
-
-
-Token = Union[str, Phrase, SearchToken]
-TokenList = List[Token]
-
-
-def _is_stop_word(word: str) -> bool:
-    # TODO Pull these out of the dictionary:
-    #  https://github.com/postgres/postgres/blob/master/src/backend/snowball/stopwords/english.stop
-    return word in {"the", "a", "you", "me", "and", "but"}
-
-
-def _tokenize_query(query: str) -> TokenList:
-    """
-    Convert the user-supplied `query` into a TokenList, which can be translated into
-    some DB-specific syntax.
-
-    The following constructs are supported:
-
-    - phrase queries using "double quotes"
-    - case-insensitive `or` and `and` operators
-    - negation of a keyword via unary `-`
-    - unary hyphen to denote NOT e.g. 'include -exclude'
-
-    The following differs from websearch_to_tsquery:
-
-    - Stop words are not removed.
-    - Unclosed phrases are treated differently.
-
-    """
-    tokens: TokenList = []
-
-    # Find phrases.
-    in_phrase = False
-    parts = deque(query.split('"'))
-    for i, part in enumerate(parts):
-        # The contents inside double quotes is treated as a phrase, a trailing
-        # double quote is not implied.
-        in_phrase = bool(i % 2) and i != (len(parts) - 1)
-
-        # Pull out the individual words, discarding any non-word characters.
-        words = deque(re.findall(r"([\w\-]+)", part, re.UNICODE))
-
-        # Phrases have simplified handling of words.
-        if in_phrase:
-            # Skip stop words.
-            phrase = [word for word in words if not _is_stop_word(word)]
-
-            # Consecutive words are implicitly ANDed together.
-            if tokens and tokens[-1] not in (SearchToken.Not, SearchToken.Or):
-                tokens.append(SearchToken.And)
-
-            # Add the phrase.
-            tokens.append(Phrase(phrase))
-            continue
-
-        # Otherwise, not in a phrase.
-        while words:
-            word = words.popleft()
-
-            if word.startswith("-"):
-                tokens.append(SearchToken.Not)
-
-                # If there's more word, put it back to be processed again.
-                word = word[1:]
-                if word:
-                    words.appendleft(word)
-            elif word.lower() == "or":
-                tokens.append(SearchToken.Or)
-            else:
-                # Skip stop words.
-                if _is_stop_word(word):
-                    continue
-
-                # Consecutive words are implicitly ANDed together.
-                if tokens and tokens[-1] not in (SearchToken.Not, SearchToken.Or):
-                    tokens.append(SearchToken.And)
-
-                # Add the search term.
-                tokens.append(word)
-
-    return tokens
-
-
-def _tokens_to_sqlite_match_query(tokens: TokenList) -> str:
-    """
-    Convert the list of tokens to a string suitable for passing to sqlite's MATCH.
-    Assume sqlite was compiled with enhanced query syntax.
-
-    Ref: https://www.sqlite.org/fts3.html#full_text_index_queries
+def _parse_query(database_engine: BaseDatabaseEngine, search_term: str) -> str:
+    """Takes a plain unicode string from the user and converts it into a form
+    that can be passed to database.
+    We use this so that we can add prefix matching, which isn't something
+    that is supported by default.
     """
-    match_query = []
-    for token in tokens:
-        if isinstance(token, str):
-            match_query.append(token)
-        elif isinstance(token, Phrase):
-            match_query.append('"' + " ".join(token.phrase) + '"')
-        elif token == SearchToken.Not:
-            # TODO: SQLite treats NOT as a *binary* operator. Hopefully a search
-            # term has already been added before this.
-            match_query.append(" NOT ")
-        elif token == SearchToken.Or:
-            match_query.append(" OR ")
-        elif token == SearchToken.And:
-            match_query.append(" AND ")
-        else:
-            raise ValueError(f"unknown token {token}")
-
-    return "".join(match_query)
 
+    # Pull out the individual words, discarding any non-word characters.
+    results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
 
-def _parse_query_for_sqlite(search_term: str) -> str:
-    """Takes a plain unicode string from the user and converts it into a form
-    that can be passed to sqllite's matchinfo().
-    """
-    return _tokens_to_sqlite_match_query(_tokenize_query(search_term))
+    if isinstance(database_engine, PostgresEngine):
+        return " & ".join(result + ":*" for result in results)
+    elif isinstance(database_engine, Sqlite3Engine):
+        return " & ".join(result + "*" for result in results)
+    else:
+        # This should be unreachable.
+        raise Exception("Unrecognized database engine")
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 9bf74bbf592..d8c0f64d9a4 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -170,22 +170,6 @@ def supports_returning(self) -> bool:
         """Do we support the `RETURNING` clause in insert/update/delete?"""
         return True
 
-    @property
-    def tsquery_func(self) -> str:
-        """
-        Selects a tsquery_* func to use.
-
-        Ref: https://www.postgresql.org/docs/current/textsearch-controls.html
-
-        Returns:
-            The function name.
-        """
-        # Postgres 11 added support for websearch_to_tsquery.
-        assert self._version is not None
-        if self._version >= 110000:
-            return "websearch_to_tsquery"
-        return "plainto_tsquery"
-
     def is_deadlock(self, error: Exception) -> bool:
         if isinstance(error, psycopg2.DatabaseError):
             # https://www.postgresql.org/docs/current/static/errcodes-appendix.html
diff --git a/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py b/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py
deleted file mode 100644
index 3de0a709eba..00000000000
--- a/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright 2022 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import json
-
-from synapse.storage.engines import BaseDatabaseEngine, Sqlite3Engine
-from synapse.storage.types import Cursor
-
-
-def run_create(cur: Cursor, database_engine: BaseDatabaseEngine) -> None:
-    """
-    Upgrade the event_search table to use the porter tokenizer if it isn't already
-
-    Applies only for sqlite.
-    """
-    if not isinstance(database_engine, Sqlite3Engine):
-        return
-
-    # Rebuild the table event_search table with tokenize=porter configured.
-    cur.execute("DROP TABLE event_search")
-    cur.execute(
-        """
-        CREATE VIRTUAL TABLE event_search
-        USING fts4 (tokenize=porter, event_id, room_id, sender, key, value )
-        """
-    )
-
-    # Re-run the background job to re-populate the event_search table.
-    cur.execute("SELECT MIN(stream_ordering) FROM events")
-    row = cur.fetchone()
-    min_stream_id = row[0]
-
-    # If there are not any events, nothing to do.
-    if min_stream_id is None:
-        return
-
-    cur.execute("SELECT MAX(stream_ordering) FROM events")
-    row = cur.fetchone()
-    max_stream_id = row[0]
-
-    progress = {
-        "target_min_stream_id_inclusive": min_stream_id,
-        "max_stream_id_exclusive": max_stream_id + 1,
-    }
-    progress_json = json.dumps(progress)
-
-    sql = """
-    INSERT into background_updates (ordering, update_name, progress_json)
-    VALUES (?, ?, ?)
-    """
-
-    cur.execute(sql, (7310, "event_search", progress_json))
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index 9ddc19900af..e747c6b50eb 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -12,22 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Tuple, Union
-from unittest.case import SkipTest
-from unittest.mock import PropertyMock, patch
-
-from twisted.test.proto_helpers import MemoryReactor
-
 import synapse.rest.admin
 from synapse.api.constants import EventTypes
 from synapse.api.errors import StoreError
 from synapse.rest.client import login, room
-from synapse.server import HomeServer
-from synapse.storage.databases.main import DataStore
-from synapse.storage.databases.main.search import Phrase, SearchToken, _tokenize_query
 from synapse.storage.engines import PostgresEngine
-from synapse.storage.engines.sqlite import Sqlite3Engine
-from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase, skip_unless
 from tests.utils import USE_POSTGRES_FOR_TESTS
@@ -198,205 +187,3 @@ def test_sqlite_non_string_deletion_background_update(self):
             ),
         )
         self.assertCountEqual(values, ["hi", "2"])
-
-
-class MessageSearchTest(HomeserverTestCase):
-    """
-    Check message search.
-
-    A powerful way to check the behaviour is to run the following in Postgres >= 11:
-
-        # SELECT websearch_to_tsquery('english', <your string>);
-
-    The result can be compared to the tokenized version for SQLite and Postgres < 11.
-
-    """
-
-    servlets = [
-        synapse.rest.admin.register_servlets_for_client_rest_resource,
-        login.register_servlets,
-        room.register_servlets,
-    ]
-
-    PHRASE = "the quick brown fox jumps over the lazy dog"
-
-    # Each entry is a search query, followed by either a boolean of whether it is
-    # in the phrase OR a tuple of booleans: whether it matches using websearch
-    # and using plain search.
-    COMMON_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [
-        ("nope", False),
-        ("brown", True),
-        ("quick brown", True),
-        ("brown quick", True),
-        ("quick \t brown", True),
-        ("jump", True),
-        ("brown nope", False),
-        ('"brown quick"', (False, True)),
-        ('"jumps over"', True),
-        ('"quick fox"', (False, True)),
-        ("nope OR doublenope", False),
-        ("furphy OR fox", (True, False)),
-        ("fox -nope", (True, False)),
-        ("fox -brown", (False, True)),
-        ('"fox" quick', True),
-        ('"fox quick', True),
-        ('"quick brown', True),
-        ('" quick "', True),
-        ('" nope"', False),
-    ]
-    # TODO Test non-ASCII cases.
-
-    # Case that fail on SQLite.
-    POSTGRES_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [
-        # SQLite treats NOT as a binary operator.
-        ("- fox", (False, True)),
-        ("- nope", (True, False)),
-        ('"-fox quick', (False, True)),
-        # PostgreSQL skips stop words.
-        ('"the quick brown"', True),
-        ('"over lazy"', True),
-    ]
-
-    def prepare(
-        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
-    ) -> None:
-        # Register a user and create a room, create some messages
-        self.register_user("alice", "password")
-        self.access_token = self.login("alice", "password")
-        self.room_id = self.helper.create_room_as("alice", tok=self.access_token)
-
-        # Send the phrase as a message and check it was created
-        response = self.helper.send(self.room_id, self.PHRASE, tok=self.access_token)
-        self.assertIn("event_id", response)
-
-    def test_tokenize_query(self) -> None:
-        """Test the custom logic to tokenize a user's query."""
-        cases = (
-            ("brown", ["brown"]),
-            ("quick brown", ["quick", SearchToken.And, "brown"]),
-            ("quick \t brown", ["quick", SearchToken.And, "brown"]),
-            ('"brown quick"', [Phrase(["brown", "quick"])]),
-            ("furphy OR fox", ["furphy", SearchToken.Or, "fox"]),
-            ("fox -brown", ["fox", SearchToken.Not, "brown"]),
-            ("- fox", [SearchToken.Not, "fox"]),
-            ('"fox" quick', [Phrase(["fox"]), SearchToken.And, "quick"]),
-            # No trailing double quoe.
-            ('"fox quick', ["fox", SearchToken.And, "quick"]),
-            ('"-fox quick', [SearchToken.Not, "fox", SearchToken.And, "quick"]),
-            ('" quick "', [Phrase(["quick"])]),
-            (
-                'q"uick brow"n',
-                [
-                    "q",
-                    SearchToken.And,
-                    Phrase(["uick", "brow"]),
-                    SearchToken.And,
-                    "n",
-                ],
-            ),
-            (
-                '-"quick brown"',
-                [SearchToken.Not, Phrase(["quick", "brown"])],
-            ),
-        )
-
-        for query, expected in cases:
-            tokenized = _tokenize_query(query)
-            self.assertEqual(
-                tokenized, expected, f"{tokenized} != {expected} for {query}"
-            )
-
-    def _check_test_cases(
-        self,
-        store: DataStore,
-        cases: List[Tuple[str, Union[bool, Tuple[bool, bool]]]],
-        index=0,
-    ) -> None:
-        # Run all the test cases versus search_msgs
-        for query, expect_to_contain in cases:
-            if isinstance(expect_to_contain, tuple):
-                expect_to_contain = expect_to_contain[index]
-
-            result = self.get_success(
-                store.search_msgs([self.room_id], query, ["content.body"])
-            )
-            self.assertEquals(
-                result["count"],
-                1 if expect_to_contain else 0,
-                f"expected '{query}' to match '{self.PHRASE}'"
-                if expect_to_contain
-                else f"'{query}' unexpectedly matched '{self.PHRASE}'",
-            )
-            self.assertEquals(
-                len(result["results"]),
-                1 if expect_to_contain else 0,
-                "results array length should match count",
-            )
-
-        # Run them again versus search_rooms
-        for query, expect_to_contain in cases:
-            if isinstance(expect_to_contain, tuple):
-                expect_to_contain = expect_to_contain[index]
-
-            result = self.get_success(
-                store.search_rooms([self.room_id], query, ["content.body"], 10)
-            )
-            self.assertEquals(
-                result["count"],
-                1 if expect_to_contain else 0,
-                f"expected '{query}' to match '{self.PHRASE}'"
-                if expect_to_contain
-                else f"'{query}' unexpectedly matched '{self.PHRASE}'",
-            )
-            self.assertEquals(
-                len(result["results"]),
-                1 if expect_to_contain else 0,
-                "results array length should match count",
-            )
-
-    def test_postgres_web_search_for_phrase(self):
-        """
-        Test searching for phrases using typical web search syntax, as per postgres' websearch_to_tsquery.
-        This test is skipped unless the postgres instance supports websearch_to_tsquery.
-        """
-
-        store = self.hs.get_datastores().main
-        if not isinstance(store.database_engine, PostgresEngine):
-            raise SkipTest("Test only applies when postgres is used as the database")
-
-        if store.database_engine.tsquery_func != "websearch_to_tsquery":
-            raise SkipTest(
-                "Test only applies when postgres supporting websearch_to_tsquery is used as the database"
-            )
-
-        self._check_test_cases(store, self.COMMON_CASES + self.POSTGRES_CASES, index=0)
-
-    def test_postgres_non_web_search_for_phrase(self):
-        """
-        Test postgres searching for phrases without using web search, which is used when websearch_to_tsquery isn't
-        supported by the current postgres version.
-        """
-
-        store = self.hs.get_datastores().main
-        if not isinstance(store.database_engine, PostgresEngine):
-            raise SkipTest("Test only applies when postgres is used as the database")
-
-        # Patch supports_websearch_to_tsquery to always return False to ensure we're testing the plainto_tsquery path.
-        with patch(
-            "synapse.storage.engines.postgres.PostgresEngine.tsquery_func",
-            new_callable=PropertyMock,
-        ) as supports_websearch_to_tsquery:
-            supports_websearch_to_tsquery.return_value = "plainto_tsquery"
-            self._check_test_cases(
-                store, self.COMMON_CASES + self.POSTGRES_CASES, index=1
-            )
-
-    def test_sqlite_search(self):
-        """
-        Test sqlite searching for phrases.
-        """
-        store = self.hs.get_datastores().main
-        if not isinstance(store.database_engine, Sqlite3Engine):
-            raise SkipTest("Test only applies when sqlite is used as the database")
-
-        self._check_test_cases(store, self.COMMON_CASES, index=0)

From 24409c7c158c108fa31433dd20ee33d423f5cb04 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 14 Feb 2018 13:47:14 +0000
Subject: [PATCH 174/278] Disable auto search for prefixes in event search

---
 synapse/storage/databases/main/search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index 1b79acf9555..ddcda6f1a74 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -771,7 +771,7 @@ def _parse_query(database_engine: BaseDatabaseEngine, search_term: str) -> str:
     results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
 
     if isinstance(database_engine, PostgresEngine):
-        return " & ".join(result + ":*" for result in results)
+        return " & ".join(result for result in results)
     elif isinstance(database_engine, Sqlite3Engine):
         return " & ".join(result + "*" for result in results)
     else:

From f8f01b869e20df9628b90e0343b0c2ea9c660933 Mon Sep 17 00:00:00 2001
From: Sean Quah <seanq@matrix.org>
Date: Fri, 4 Nov 2022 11:38:23 +0000
Subject: [PATCH 175/278] Revert "Disable auto search for prefixes in event
 search"

This reverts commit 24409c7c158c108fa31433dd20ee33d423f5cb04.
---
 synapse/storage/databases/main/search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index ddcda6f1a74..1b79acf9555 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -771,7 +771,7 @@ def _parse_query(database_engine: BaseDatabaseEngine, search_term: str) -> str:
     results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
 
     if isinstance(database_engine, PostgresEngine):
-        return " & ".join(result for result in results)
+        return " & ".join(result + ":*" for result in results)
     elif isinstance(database_engine, Sqlite3Engine):
         return " & ".join(result + "*" for result in results)
     else:

From 3b44a7c9d1beebc1dff41ab4dfe63ba7d107db45 Mon Sep 17 00:00:00 2001
From: Sean Quah <seanq@matrix.org>
Date: Fri, 4 Nov 2022 11:38:31 +0000
Subject: [PATCH 176/278] Revert "Revert "Unified search query syntax using the
 full-text search capabilities of the underlying DB. (#11635)""

This reverts commit 7e0dd52782806592df61a1a517660b694515dc27.
---
 synapse/storage/databases/main/search.py      | 197 +++++++++++++---
 synapse/storage/engines/postgres.py           |  16 ++
 .../73/10_update_sqlite_fts4_tokenizer.py     |  62 +++++
 tests/storage/test_room_search.py             | 213 ++++++++++++++++++
 4 files changed, 453 insertions(+), 35 deletions(-)
 create mode 100644 synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py

diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index 1b79acf9555..a89fc54c2cb 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -11,10 +11,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import enum
 import logging
 import re
-from typing import TYPE_CHECKING, Any, Collection, Iterable, List, Optional, Set, Tuple
+from collections import deque
+from dataclasses import dataclass
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Collection,
+    Iterable,
+    List,
+    Optional,
+    Set,
+    Tuple,
+    Union,
+)
 
 import attr
 
@@ -27,7 +39,7 @@
     LoggingTransaction,
 )
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
-from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
 from synapse.types import JsonDict
 
 if TYPE_CHECKING:
@@ -421,8 +433,6 @@ async def search_msgs(
         """
         clauses = []
 
-        search_query = _parse_query(self.database_engine, search_term)
-
         args: List[Any] = []
 
         # Make sure we don't explode because the person is in too many rooms.
@@ -444,20 +454,24 @@ async def search_msgs(
         count_clauses = clauses
 
         if isinstance(self.database_engine, PostgresEngine):
+            search_query = search_term
+            tsquery_func = self.database_engine.tsquery_func
             sql = (
-                "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) AS rank,"
+                f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) AS rank,"
                 " room_id, event_id"
                 " FROM event_search"
-                " WHERE vector @@ to_tsquery('english', ?)"
+                f" WHERE vector @@  {tsquery_func}('english', ?)"
             )
             args = [search_query, search_query] + args
 
             count_sql = (
                 "SELECT room_id, count(*) as count FROM event_search"
-                " WHERE vector @@ to_tsquery('english', ?)"
+                f" WHERE vector @@ {tsquery_func}('english', ?)"
             )
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
+            search_query = _parse_query_for_sqlite(search_term)
+
             sql = (
                 "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id"
                 " FROM event_search"
@@ -469,7 +483,7 @@ async def search_msgs(
                 "SELECT room_id, count(*) as count FROM event_search"
                 " WHERE value MATCH ?"
             )
-            count_args = [search_term] + count_args
+            count_args = [search_query] + count_args
         else:
             # This should be unreachable.
             raise Exception("Unrecognized database engine")
@@ -501,7 +515,9 @@ async def search_msgs(
 
         highlights = None
         if isinstance(self.database_engine, PostgresEngine):
-            highlights = await self._find_highlights_in_postgres(search_query, events)
+            highlights = await self._find_highlights_in_postgres(
+                search_query, events, tsquery_func
+            )
 
         count_sql += " GROUP BY room_id"
 
@@ -510,7 +526,6 @@ async def search_msgs(
         )
 
         count = sum(row["count"] for row in count_results if row["room_id"] in room_ids)
-
         return {
             "results": [
                 {"event": event_map[r["event_id"]], "rank": r["rank"]}
@@ -542,9 +557,6 @@ async def search_rooms(
             Each match as a dictionary.
         """
         clauses = []
-
-        search_query = _parse_query(self.database_engine, search_term)
-
         args: List[Any] = []
 
         # Make sure we don't explode because the person is in too many rooms.
@@ -582,20 +594,23 @@ async def search_rooms(
             args.extend([origin_server_ts, origin_server_ts, stream])
 
         if isinstance(self.database_engine, PostgresEngine):
+            search_query = search_term
+            tsquery_func = self.database_engine.tsquery_func
             sql = (
-                "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) as rank,"
+                f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) as rank,"
                 " origin_server_ts, stream_ordering, room_id, event_id"
                 " FROM event_search"
-                " WHERE vector @@ to_tsquery('english', ?) AND "
+                f" WHERE vector @@ {tsquery_func}('english', ?) AND "
             )
             args = [search_query, search_query] + args
 
             count_sql = (
                 "SELECT room_id, count(*) as count FROM event_search"
-                " WHERE vector @@ to_tsquery('english', ?) AND "
+                f" WHERE vector @@ {tsquery_func}('english', ?) AND "
             )
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
+
             # We use CROSS JOIN here to ensure we use the right indexes.
             # https://sqlite.org/optoverview.html#crossjoin
             #
@@ -614,13 +629,14 @@ async def search_rooms(
                 " CROSS JOIN events USING (event_id)"
                 " WHERE "
             )
+            search_query = _parse_query_for_sqlite(search_term)
             args = [search_query] + args
 
             count_sql = (
                 "SELECT room_id, count(*) as count FROM event_search"
                 " WHERE value MATCH ? AND "
             )
-            count_args = [search_term] + count_args
+            count_args = [search_query] + count_args
         else:
             # This should be unreachable.
             raise Exception("Unrecognized database engine")
@@ -660,7 +676,9 @@ async def search_rooms(
 
         highlights = None
         if isinstance(self.database_engine, PostgresEngine):
-            highlights = await self._find_highlights_in_postgres(search_query, events)
+            highlights = await self._find_highlights_in_postgres(
+                search_query, events, tsquery_func
+            )
 
         count_sql += " GROUP BY room_id"
 
@@ -686,7 +704,7 @@ async def search_rooms(
         }
 
     async def _find_highlights_in_postgres(
-        self, search_query: str, events: List[EventBase]
+        self, search_query: str, events: List[EventBase], tsquery_func: str
     ) -> Set[str]:
         """Given a list of events and a search term, return a list of words
         that match from the content of the event.
@@ -697,6 +715,7 @@ async def _find_highlights_in_postgres(
         Args:
             search_query
             events: A list of events
+            tsquery_func: The tsquery_* function to use when making queries
 
         Returns:
             A set of strings.
@@ -729,7 +748,7 @@ def f(txn: LoggingTransaction) -> Set[str]:
                 while stop_sel in value:
                     stop_sel += ">"
 
-                query = "SELECT ts_headline(?, to_tsquery('english', ?), %s)" % (
+                query = f"SELECT ts_headline(?, {tsquery_func}('english', ?), %s)" % (
                     _to_postgres_options(
                         {
                             "StartSel": start_sel,
@@ -760,20 +779,128 @@ def _to_postgres_options(options_dict: JsonDict) -> str:
     return "'%s'" % (",".join("%s=%s" % (k, v) for k, v in options_dict.items()),)
 
 
-def _parse_query(database_engine: BaseDatabaseEngine, search_term: str) -> str:
-    """Takes a plain unicode string from the user and converts it into a form
-    that can be passed to database.
-    We use this so that we can add prefix matching, which isn't something
-    that is supported by default.
+@dataclass
+class Phrase:
+    phrase: List[str]
+
+
+class SearchToken(enum.Enum):
+    Not = enum.auto()
+    Or = enum.auto()
+    And = enum.auto()
+
+
+Token = Union[str, Phrase, SearchToken]
+TokenList = List[Token]
+
+
+def _is_stop_word(word: str) -> bool:
+    # TODO Pull these out of the dictionary:
+    #  https://github.com/postgres/postgres/blob/master/src/backend/snowball/stopwords/english.stop
+    return word in {"the", "a", "you", "me", "and", "but"}
+
+
+def _tokenize_query(query: str) -> TokenList:
+    """
+    Convert the user-supplied `query` into a TokenList, which can be translated into
+    some DB-specific syntax.
+
+    The following constructs are supported:
+
+    - phrase queries using "double quotes"
+    - case-insensitive `or` and `and` operators
+    - negation of a keyword via unary `-`
+    - unary hyphen to denote NOT e.g. 'include -exclude'
+
+    The following differs from websearch_to_tsquery:
+
+    - Stop words are not removed.
+    - Unclosed phrases are treated differently.
+
+    """
+    tokens: TokenList = []
+
+    # Find phrases.
+    in_phrase = False
+    parts = deque(query.split('"'))
+    for i, part in enumerate(parts):
+        # The contents inside double quotes is treated as a phrase, a trailing
+        # double quote is not implied.
+        in_phrase = bool(i % 2) and i != (len(parts) - 1)
+
+        # Pull out the individual words, discarding any non-word characters.
+        words = deque(re.findall(r"([\w\-]+)", part, re.UNICODE))
+
+        # Phrases have simplified handling of words.
+        if in_phrase:
+            # Skip stop words.
+            phrase = [word for word in words if not _is_stop_word(word)]
+
+            # Consecutive words are implicitly ANDed together.
+            if tokens and tokens[-1] not in (SearchToken.Not, SearchToken.Or):
+                tokens.append(SearchToken.And)
+
+            # Add the phrase.
+            tokens.append(Phrase(phrase))
+            continue
+
+        # Otherwise, not in a phrase.
+        while words:
+            word = words.popleft()
+
+            if word.startswith("-"):
+                tokens.append(SearchToken.Not)
+
+                # If there's more word, put it back to be processed again.
+                word = word[1:]
+                if word:
+                    words.appendleft(word)
+            elif word.lower() == "or":
+                tokens.append(SearchToken.Or)
+            else:
+                # Skip stop words.
+                if _is_stop_word(word):
+                    continue
+
+                # Consecutive words are implicitly ANDed together.
+                if tokens and tokens[-1] not in (SearchToken.Not, SearchToken.Or):
+                    tokens.append(SearchToken.And)
+
+                # Add the search term.
+                tokens.append(word)
+
+    return tokens
+
+
+def _tokens_to_sqlite_match_query(tokens: TokenList) -> str:
+    """
+    Convert the list of tokens to a string suitable for passing to sqlite's MATCH.
+    Assume sqlite was compiled with enhanced query syntax.
+
+    Ref: https://www.sqlite.org/fts3.html#full_text_index_queries
     """
+    match_query = []
+    for token in tokens:
+        if isinstance(token, str):
+            match_query.append(token)
+        elif isinstance(token, Phrase):
+            match_query.append('"' + " ".join(token.phrase) + '"')
+        elif token == SearchToken.Not:
+            # TODO: SQLite treats NOT as a *binary* operator. Hopefully a search
+            # term has already been added before this.
+            match_query.append(" NOT ")
+        elif token == SearchToken.Or:
+            match_query.append(" OR ")
+        elif token == SearchToken.And:
+            match_query.append(" AND ")
+        else:
+            raise ValueError(f"unknown token {token}")
+
+    return "".join(match_query)
 
-    # Pull out the individual words, discarding any non-word characters.
-    results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
 
-    if isinstance(database_engine, PostgresEngine):
-        return " & ".join(result + ":*" for result in results)
-    elif isinstance(database_engine, Sqlite3Engine):
-        return " & ".join(result + "*" for result in results)
-    else:
-        # This should be unreachable.
-        raise Exception("Unrecognized database engine")
+def _parse_query_for_sqlite(search_term: str) -> str:
+    """Takes a plain unicode string from the user and converts it into a form
+    that can be passed to sqllite's matchinfo().
+    """
+    return _tokens_to_sqlite_match_query(_tokenize_query(search_term))
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index d8c0f64d9a4..9bf74bbf592 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -170,6 +170,22 @@ def supports_returning(self) -> bool:
         """Do we support the `RETURNING` clause in insert/update/delete?"""
         return True
 
+    @property
+    def tsquery_func(self) -> str:
+        """
+        Selects a tsquery_* func to use.
+
+        Ref: https://www.postgresql.org/docs/current/textsearch-controls.html
+
+        Returns:
+            The function name.
+        """
+        # Postgres 11 added support for websearch_to_tsquery.
+        assert self._version is not None
+        if self._version >= 110000:
+            return "websearch_to_tsquery"
+        return "plainto_tsquery"
+
     def is_deadlock(self, error: Exception) -> bool:
         if isinstance(error, psycopg2.DatabaseError):
             # https://www.postgresql.org/docs/current/static/errcodes-appendix.html
diff --git a/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py b/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py
new file mode 100644
index 00000000000..3de0a709eba
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py
@@ -0,0 +1,62 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+
+from synapse.storage.engines import BaseDatabaseEngine, Sqlite3Engine
+from synapse.storage.types import Cursor
+
+
+def run_create(cur: Cursor, database_engine: BaseDatabaseEngine) -> None:
+    """
+    Upgrade the event_search table to use the porter tokenizer if it isn't already
+
+    Applies only for sqlite.
+    """
+    if not isinstance(database_engine, Sqlite3Engine):
+        return
+
+    # Rebuild the table event_search table with tokenize=porter configured.
+    cur.execute("DROP TABLE event_search")
+    cur.execute(
+        """
+        CREATE VIRTUAL TABLE event_search
+        USING fts4 (tokenize=porter, event_id, room_id, sender, key, value )
+        """
+    )
+
+    # Re-run the background job to re-populate the event_search table.
+    cur.execute("SELECT MIN(stream_ordering) FROM events")
+    row = cur.fetchone()
+    min_stream_id = row[0]
+
+    # If there are not any events, nothing to do.
+    if min_stream_id is None:
+        return
+
+    cur.execute("SELECT MAX(stream_ordering) FROM events")
+    row = cur.fetchone()
+    max_stream_id = row[0]
+
+    progress = {
+        "target_min_stream_id_inclusive": min_stream_id,
+        "max_stream_id_exclusive": max_stream_id + 1,
+    }
+    progress_json = json.dumps(progress)
+
+    sql = """
+    INSERT into background_updates (ordering, update_name, progress_json)
+    VALUES (?, ?, ?)
+    """
+
+    cur.execute(sql, (7310, "event_search", progress_json))
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index e747c6b50eb..9ddc19900af 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -12,11 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import List, Tuple, Union
+from unittest.case import SkipTest
+from unittest.mock import PropertyMock, patch
+
+from twisted.test.proto_helpers import MemoryReactor
+
 import synapse.rest.admin
 from synapse.api.constants import EventTypes
 from synapse.api.errors import StoreError
 from synapse.rest.client import login, room
+from synapse.server import HomeServer
+from synapse.storage.databases.main import DataStore
+from synapse.storage.databases.main.search import Phrase, SearchToken, _tokenize_query
 from synapse.storage.engines import PostgresEngine
+from synapse.storage.engines.sqlite import Sqlite3Engine
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase, skip_unless
 from tests.utils import USE_POSTGRES_FOR_TESTS
@@ -187,3 +198,205 @@ def test_sqlite_non_string_deletion_background_update(self):
             ),
         )
         self.assertCountEqual(values, ["hi", "2"])
+
+
+class MessageSearchTest(HomeserverTestCase):
+    """
+    Check message search.
+
+    A powerful way to check the behaviour is to run the following in Postgres >= 11:
+
+        # SELECT websearch_to_tsquery('english', <your string>);
+
+    The result can be compared to the tokenized version for SQLite and Postgres < 11.
+
+    """
+
+    servlets = [
+        synapse.rest.admin.register_servlets_for_client_rest_resource,
+        login.register_servlets,
+        room.register_servlets,
+    ]
+
+    PHRASE = "the quick brown fox jumps over the lazy dog"
+
+    # Each entry is a search query, followed by either a boolean of whether it is
+    # in the phrase OR a tuple of booleans: whether it matches using websearch
+    # and using plain search.
+    COMMON_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [
+        ("nope", False),
+        ("brown", True),
+        ("quick brown", True),
+        ("brown quick", True),
+        ("quick \t brown", True),
+        ("jump", True),
+        ("brown nope", False),
+        ('"brown quick"', (False, True)),
+        ('"jumps over"', True),
+        ('"quick fox"', (False, True)),
+        ("nope OR doublenope", False),
+        ("furphy OR fox", (True, False)),
+        ("fox -nope", (True, False)),
+        ("fox -brown", (False, True)),
+        ('"fox" quick', True),
+        ('"fox quick', True),
+        ('"quick brown', True),
+        ('" quick "', True),
+        ('" nope"', False),
+    ]
+    # TODO Test non-ASCII cases.
+
+    # Case that fail on SQLite.
+    POSTGRES_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [
+        # SQLite treats NOT as a binary operator.
+        ("- fox", (False, True)),
+        ("- nope", (True, False)),
+        ('"-fox quick', (False, True)),
+        # PostgreSQL skips stop words.
+        ('"the quick brown"', True),
+        ('"over lazy"', True),
+    ]
+
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
+        # Register a user and create a room, create some messages
+        self.register_user("alice", "password")
+        self.access_token = self.login("alice", "password")
+        self.room_id = self.helper.create_room_as("alice", tok=self.access_token)
+
+        # Send the phrase as a message and check it was created
+        response = self.helper.send(self.room_id, self.PHRASE, tok=self.access_token)
+        self.assertIn("event_id", response)
+
+    def test_tokenize_query(self) -> None:
+        """Test the custom logic to tokenize a user's query."""
+        cases = (
+            ("brown", ["brown"]),
+            ("quick brown", ["quick", SearchToken.And, "brown"]),
+            ("quick \t brown", ["quick", SearchToken.And, "brown"]),
+            ('"brown quick"', [Phrase(["brown", "quick"])]),
+            ("furphy OR fox", ["furphy", SearchToken.Or, "fox"]),
+            ("fox -brown", ["fox", SearchToken.Not, "brown"]),
+            ("- fox", [SearchToken.Not, "fox"]),
+            ('"fox" quick', [Phrase(["fox"]), SearchToken.And, "quick"]),
+            # No trailing double quoe.
+            ('"fox quick', ["fox", SearchToken.And, "quick"]),
+            ('"-fox quick', [SearchToken.Not, "fox", SearchToken.And, "quick"]),
+            ('" quick "', [Phrase(["quick"])]),
+            (
+                'q"uick brow"n',
+                [
+                    "q",
+                    SearchToken.And,
+                    Phrase(["uick", "brow"]),
+                    SearchToken.And,
+                    "n",
+                ],
+            ),
+            (
+                '-"quick brown"',
+                [SearchToken.Not, Phrase(["quick", "brown"])],
+            ),
+        )
+
+        for query, expected in cases:
+            tokenized = _tokenize_query(query)
+            self.assertEqual(
+                tokenized, expected, f"{tokenized} != {expected} for {query}"
+            )
+
+    def _check_test_cases(
+        self,
+        store: DataStore,
+        cases: List[Tuple[str, Union[bool, Tuple[bool, bool]]]],
+        index=0,
+    ) -> None:
+        # Run all the test cases versus search_msgs
+        for query, expect_to_contain in cases:
+            if isinstance(expect_to_contain, tuple):
+                expect_to_contain = expect_to_contain[index]
+
+            result = self.get_success(
+                store.search_msgs([self.room_id], query, ["content.body"])
+            )
+            self.assertEquals(
+                result["count"],
+                1 if expect_to_contain else 0,
+                f"expected '{query}' to match '{self.PHRASE}'"
+                if expect_to_contain
+                else f"'{query}' unexpectedly matched '{self.PHRASE}'",
+            )
+            self.assertEquals(
+                len(result["results"]),
+                1 if expect_to_contain else 0,
+                "results array length should match count",
+            )
+
+        # Run them again versus search_rooms
+        for query, expect_to_contain in cases:
+            if isinstance(expect_to_contain, tuple):
+                expect_to_contain = expect_to_contain[index]
+
+            result = self.get_success(
+                store.search_rooms([self.room_id], query, ["content.body"], 10)
+            )
+            self.assertEquals(
+                result["count"],
+                1 if expect_to_contain else 0,
+                f"expected '{query}' to match '{self.PHRASE}'"
+                if expect_to_contain
+                else f"'{query}' unexpectedly matched '{self.PHRASE}'",
+            )
+            self.assertEquals(
+                len(result["results"]),
+                1 if expect_to_contain else 0,
+                "results array length should match count",
+            )
+
+    def test_postgres_web_search_for_phrase(self):
+        """
+        Test searching for phrases using typical web search syntax, as per postgres' websearch_to_tsquery.
+        This test is skipped unless the postgres instance supports websearch_to_tsquery.
+        """
+
+        store = self.hs.get_datastores().main
+        if not isinstance(store.database_engine, PostgresEngine):
+            raise SkipTest("Test only applies when postgres is used as the database")
+
+        if store.database_engine.tsquery_func != "websearch_to_tsquery":
+            raise SkipTest(
+                "Test only applies when postgres supporting websearch_to_tsquery is used as the database"
+            )
+
+        self._check_test_cases(store, self.COMMON_CASES + self.POSTGRES_CASES, index=0)
+
+    def test_postgres_non_web_search_for_phrase(self):
+        """
+        Test postgres searching for phrases without using web search, which is used when websearch_to_tsquery isn't
+        supported by the current postgres version.
+        """
+
+        store = self.hs.get_datastores().main
+        if not isinstance(store.database_engine, PostgresEngine):
+            raise SkipTest("Test only applies when postgres is used as the database")
+
+        # Patch supports_websearch_to_tsquery to always return False to ensure we're testing the plainto_tsquery path.
+        with patch(
+            "synapse.storage.engines.postgres.PostgresEngine.tsquery_func",
+            new_callable=PropertyMock,
+        ) as supports_websearch_to_tsquery:
+            supports_websearch_to_tsquery.return_value = "plainto_tsquery"
+            self._check_test_cases(
+                store, self.COMMON_CASES + self.POSTGRES_CASES, index=1
+            )
+
+    def test_sqlite_search(self):
+        """
+        Test sqlite searching for phrases.
+        """
+        store = self.hs.get_datastores().main
+        if not isinstance(store.database_engine, Sqlite3Engine):
+            raise SkipTest("Test only applies when sqlite is used as the database")
+
+        self._check_test_cases(store, self.COMMON_CASES, index=0)

From e2008e4ef5235fafd4dea72c73472faa379c1e3f Mon Sep 17 00:00:00 2001
From: Sean Quah <seanq@matrix.org>
Date: Fri, 4 Nov 2022 11:38:39 +0000
Subject: [PATCH 177/278] Revert "Revert "Fix tests for change in PostgreSQL 14
 behavior change. (#14310)""

This reverts commit 7f77f1386c94c753755b79d7d7025e958b228282.
---
 synapse/storage/databases/main/search.py |  5 ++---
 tests/storage/test_room_search.py        | 16 ++++++++++++----
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index a89fc54c2cb..594b935614f 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -824,9 +824,8 @@ def _tokenize_query(query: str) -> TokenList:
     in_phrase = False
     parts = deque(query.split('"'))
     for i, part in enumerate(parts):
-        # The contents inside double quotes is treated as a phrase, a trailing
-        # double quote is not implied.
-        in_phrase = bool(i % 2) and i != (len(parts) - 1)
+        # The contents inside double quotes is treated as a phrase.
+        in_phrase = bool(i % 2)
 
         # Pull out the individual words, discarding any non-word characters.
         words = deque(re.findall(r"([\w\-]+)", part, re.UNICODE))
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index 9ddc19900af..868b5bee848 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -239,7 +239,6 @@ class MessageSearchTest(HomeserverTestCase):
         ("fox -nope", (True, False)),
         ("fox -brown", (False, True)),
         ('"fox" quick', True),
-        ('"fox quick', True),
         ('"quick brown', True),
         ('" quick "', True),
         ('" nope"', False),
@@ -269,6 +268,15 @@ def prepare(
         response = self.helper.send(self.room_id, self.PHRASE, tok=self.access_token)
         self.assertIn("event_id", response)
 
+        # The behaviour of a missing trailing double quote changed in PostgreSQL 14
+        # from ignoring the initial double quote to treating it as a phrase.
+        main_store = homeserver.get_datastores().main
+        found = False
+        if isinstance(main_store.database_engine, PostgresEngine):
+            assert main_store.database_engine._version is not None
+            found = main_store.database_engine._version < 140000
+        self.COMMON_CASES.append(('"fox quick', (found, True)))
+
     def test_tokenize_query(self) -> None:
         """Test the custom logic to tokenize a user's query."""
         cases = (
@@ -280,9 +288,9 @@ def test_tokenize_query(self) -> None:
             ("fox -brown", ["fox", SearchToken.Not, "brown"]),
             ("- fox", [SearchToken.Not, "fox"]),
             ('"fox" quick', [Phrase(["fox"]), SearchToken.And, "quick"]),
-            # No trailing double quoe.
-            ('"fox quick', ["fox", SearchToken.And, "quick"]),
-            ('"-fox quick', [SearchToken.Not, "fox", SearchToken.And, "quick"]),
+            # No trailing double quote.
+            ('"fox quick', [Phrase(["fox", "quick"])]),
+            ('"-fox quick', [Phrase(["-fox", "quick"])]),
             ('" quick "', [Phrase(["quick"])]),
             (
                 'q"uick brow"n',

From 02a25ce3f934451b34105a4fb7a6366e2f563b99 Mon Sep 17 00:00:00 2001
From: Sean Quah <seanq@matrix.org>
Date: Fri, 4 Nov 2022 11:38:46 +0000
Subject: [PATCH 178/278] Revert "Revert "Switch search SQL to triple-quote
 strings. (#14311)""

This reverts commit 220af1df54c3b4e5f1d7ec8fb62b375c99a2bbe2.
---
 synapse/storage/databases/main/search.py | 188 ++++++++++++-----------
 1 file changed, 99 insertions(+), 89 deletions(-)

diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index 594b935614f..e9588d17551 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -80,11 +80,11 @@ def store_search_entries_txn(
         if not self.hs.config.server.enable_search:
             return
         if isinstance(self.database_engine, PostgresEngine):
-            sql = (
-                "INSERT INTO event_search"
-                " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)"
-                " VALUES (?,?,?,to_tsvector('english', ?),?,?)"
-            )
+            sql = """
+            INSERT INTO event_search
+            (event_id, room_id, key, vector, stream_ordering, origin_server_ts)
+            VALUES (?,?,?,to_tsvector('english', ?),?,?)
+            """
 
             args1 = (
                 (
@@ -101,20 +101,20 @@ def store_search_entries_txn(
             txn.execute_batch(sql, args1)
 
         elif isinstance(self.database_engine, Sqlite3Engine):
-            sql = (
-                "INSERT INTO event_search (event_id, room_id, key, value)"
-                " VALUES (?,?,?,?)"
-            )
-            args2 = (
-                (
-                    entry.event_id,
-                    entry.room_id,
-                    entry.key,
-                    _clean_value_for_search(entry.value),
-                )
-                for entry in entries
+            self.db_pool.simple_insert_many_txn(
+                txn,
+                table="event_search",
+                keys=("event_id", "room_id", "key", "value"),
+                values=(
+                    (
+                        entry.event_id,
+                        entry.room_id,
+                        entry.key,
+                        _clean_value_for_search(entry.value),
+                    )
+                    for entry in entries
+                ),
             )
-            txn.execute_batch(sql, args2)
 
         else:
             # This should be unreachable.
@@ -162,15 +162,17 @@ async def _background_reindex_search(
         TYPES = ["m.room.name", "m.room.message", "m.room.topic"]
 
         def reindex_search_txn(txn: LoggingTransaction) -> int:
-            sql = (
-                "SELECT stream_ordering, event_id, room_id, type, json, "
-                " origin_server_ts FROM events"
-                " JOIN event_json USING (room_id, event_id)"
-                " WHERE ? <= stream_ordering AND stream_ordering < ?"
-                " AND (%s)"
-                " ORDER BY stream_ordering DESC"
-                " LIMIT ?"
-            ) % (" OR ".join("type = '%s'" % (t,) for t in TYPES),)
+            sql = """
+            SELECT stream_ordering, event_id, room_id, type, json, origin_server_ts
+            FROM events
+            JOIN event_json USING (room_id, event_id)
+            WHERE ? <= stream_ordering AND stream_ordering < ?
+            AND (%s)
+            ORDER BY stream_ordering DESC
+            LIMIT ?
+            """ % (
+                " OR ".join("type = '%s'" % (t,) for t in TYPES),
+            )
 
             txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
 
@@ -284,8 +286,10 @@ def create_index(conn: LoggingDatabaseConnection) -> None:
 
                 try:
                     c.execute(
-                        "CREATE INDEX CONCURRENTLY event_search_fts_idx"
-                        " ON event_search USING GIN (vector)"
+                        """
+                        CREATE INDEX CONCURRENTLY event_search_fts_idx
+                        ON event_search USING GIN (vector)
+                        """
                     )
                 except psycopg2.ProgrammingError as e:
                     logger.warning(
@@ -323,12 +327,16 @@ def create_index(conn: LoggingDatabaseConnection) -> None:
                 # We create with NULLS FIRST so that when we search *backwards*
                 # we get the ones with non null origin_server_ts *first*
                 c.execute(
-                    "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search("
-                    "room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)"
+                    """
+                    CREATE INDEX CONCURRENTLY event_search_room_order
+                    ON event_search(room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)
+                    """
                 )
                 c.execute(
-                    "CREATE INDEX CONCURRENTLY event_search_order ON event_search("
-                    "origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)"
+                    """
+                    CREATE INDEX CONCURRENTLY event_search_order
+                    ON event_search(origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)
+                    """
                 )
                 conn.set_session(autocommit=False)
 
@@ -345,14 +353,14 @@ def create_index(conn: LoggingDatabaseConnection) -> None:
             )
 
         def reindex_search_txn(txn: LoggingTransaction) -> Tuple[int, bool]:
-            sql = (
-                "UPDATE event_search AS es SET stream_ordering = e.stream_ordering,"
-                " origin_server_ts = e.origin_server_ts"
-                " FROM events AS e"
-                " WHERE e.event_id = es.event_id"
-                " AND ? <= e.stream_ordering AND e.stream_ordering < ?"
-                " RETURNING es.stream_ordering"
-            )
+            sql = """
+            UPDATE event_search AS es
+            SET stream_ordering = e.stream_ordering, origin_server_ts = e.origin_server_ts
+            FROM events AS e
+            WHERE e.event_id = es.event_id
+            AND ? <= e.stream_ordering AND e.stream_ordering < ?
+            RETURNING es.stream_ordering
+            """
 
             min_stream_id = max_stream_id - batch_size
             txn.execute(sql, (min_stream_id, max_stream_id))
@@ -456,33 +464,33 @@ async def search_msgs(
         if isinstance(self.database_engine, PostgresEngine):
             search_query = search_term
             tsquery_func = self.database_engine.tsquery_func
-            sql = (
-                f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) AS rank,"
-                " room_id, event_id"
-                " FROM event_search"
-                f" WHERE vector @@  {tsquery_func}('english', ?)"
-            )
+            sql = f"""
+            SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) AS rank,
+            room_id, event_id
+            FROM event_search
+            WHERE vector @@  {tsquery_func}('english', ?)
+            """
             args = [search_query, search_query] + args
 
-            count_sql = (
-                "SELECT room_id, count(*) as count FROM event_search"
-                f" WHERE vector @@ {tsquery_func}('english', ?)"
-            )
+            count_sql = f"""
+            SELECT room_id, count(*) as count FROM event_search
+            WHERE vector @@ {tsquery_func}('english', ?)
+            """
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
             search_query = _parse_query_for_sqlite(search_term)
 
-            sql = (
-                "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id"
-                " FROM event_search"
-                " WHERE value MATCH ?"
-            )
+            sql = """
+            SELECT rank(matchinfo(event_search)) as rank, room_id, event_id
+            FROM event_search
+            WHERE value MATCH ?
+            """
             args = [search_query] + args
 
-            count_sql = (
-                "SELECT room_id, count(*) as count FROM event_search"
-                " WHERE value MATCH ?"
-            )
+            count_sql = """
+            SELECT room_id, count(*) as count FROM event_search
+            WHERE value MATCH ?
+            """
             count_args = [search_query] + count_args
         else:
             # This should be unreachable.
@@ -588,26 +596,27 @@ async def search_rooms(
                 raise SynapseError(400, "Invalid pagination token")
 
             clauses.append(
-                "(origin_server_ts < ?"
-                " OR (origin_server_ts = ? AND stream_ordering < ?))"
+                """
+                (origin_server_ts < ? OR (origin_server_ts = ? AND stream_ordering < ?))
+                """
             )
             args.extend([origin_server_ts, origin_server_ts, stream])
 
         if isinstance(self.database_engine, PostgresEngine):
             search_query = search_term
             tsquery_func = self.database_engine.tsquery_func
-            sql = (
-                f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) as rank,"
-                " origin_server_ts, stream_ordering, room_id, event_id"
-                " FROM event_search"
-                f" WHERE vector @@ {tsquery_func}('english', ?) AND "
-            )
+            sql = f"""
+            SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) as rank,
+            origin_server_ts, stream_ordering, room_id, event_id
+            FROM event_search
+            WHERE vector @@ {tsquery_func}('english', ?) AND
+            """
             args = [search_query, search_query] + args
 
-            count_sql = (
-                "SELECT room_id, count(*) as count FROM event_search"
-                f" WHERE vector @@ {tsquery_func}('english', ?) AND "
-            )
+            count_sql = f"""
+            SELECT room_id, count(*) as count FROM event_search
+            WHERE vector @@ {tsquery_func}('english', ?) AND
+            """
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
 
@@ -619,23 +628,24 @@ async def search_rooms(
             # in the events table to get the topological ordering. We need
             # to use the indexes in this order because sqlite refuses to
             # MATCH unless it uses the full text search index
-            sql = (
-                "SELECT rank(matchinfo) as rank, room_id, event_id,"
-                " origin_server_ts, stream_ordering"
-                " FROM (SELECT key, event_id, matchinfo(event_search) as matchinfo"
-                " FROM event_search"
-                " WHERE value MATCH ?"
-                " )"
-                " CROSS JOIN events USING (event_id)"
-                " WHERE "
+            sql = """
+            SELECT
+                rank(matchinfo) as rank, room_id, event_id, origin_server_ts, stream_ordering
+            FROM (
+                SELECT key, event_id, matchinfo(event_search) as matchinfo
+                FROM event_search
+                WHERE value MATCH ?
             )
+            CROSS JOIN events USING (event_id)
+            WHERE
+            """
             search_query = _parse_query_for_sqlite(search_term)
             args = [search_query] + args
 
-            count_sql = (
-                "SELECT room_id, count(*) as count FROM event_search"
-                " WHERE value MATCH ? AND "
-            )
+            count_sql = """
+            SELECT room_id, count(*) as count FROM event_search
+            WHERE value MATCH ? AND
+            """
             count_args = [search_query] + count_args
         else:
             # This should be unreachable.
@@ -647,10 +657,10 @@ async def search_rooms(
         # We add an arbitrary limit here to ensure we don't try to pull the
         # entire table from the database.
         if isinstance(self.database_engine, PostgresEngine):
-            sql += (
-                " ORDER BY origin_server_ts DESC NULLS LAST,"
-                " stream_ordering DESC NULLS LAST LIMIT ?"
-            )
+            sql += """
+            ORDER BY origin_server_ts DESC NULLS LAST, stream_ordering DESC NULLS LAST
+            LIMIT ?
+            """
         elif isinstance(self.database_engine, Sqlite3Engine):
             sql += " ORDER BY origin_server_ts DESC, stream_ordering DESC LIMIT ?"
         else:

From 779d48f4de9cf0728ec8a3e99edda8d125a30eff Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Sat, 5 Nov 2022 18:21:07 +0000
Subject: [PATCH 179/278] Fix background update table-scanning `events`

When this background update did its last batch, it would try to update all the
events that had been inserted since the bgupdate started, which could cause a
table-scan. Make sure we limit the update correctly
---
 changelog.d/14374.bugfix                         |  1 +
 .../storage/databases/main/events_bg_updates.py  | 16 ++++++++--------
 2 files changed, 9 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/14374.bugfix

diff --git a/changelog.d/14374.bugfix b/changelog.d/14374.bugfix
new file mode 100644
index 00000000000..8226f5b6511
--- /dev/null
+++ b/changelog.d/14374.bugfix
@@ -0,0 +1 @@
+Fix a background database update which could cause poor database performance.
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index 6e8aeed7b4b..9e31798ab19 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -1435,16 +1435,16 @@ def _populate_txn(txn: LoggingTransaction) -> bool:
                 ),
             )
 
-            endpoint = None
             row = txn.fetchone()
             if row:
                 endpoint = row[0]
+            else:
+                # if the query didn't return a row, we must be almost done. We just
+                # need to go up to the recorded max_stream_ordering.
+                endpoint = max_stream_ordering_inclusive
 
-            where_clause = "stream_ordering > ?"
-            args = [min_stream_ordering_exclusive]
-            if endpoint:
-                where_clause += " AND stream_ordering <= ?"
-                args.append(endpoint)
+            where_clause = "stream_ordering > ? AND stream_ordering <= ?"
+            args = [min_stream_ordering_exclusive, endpoint]
 
             # now do the updates.
             txn.execute(
@@ -1458,13 +1458,13 @@ def _populate_txn(txn: LoggingTransaction) -> bool:
             )
 
             logger.info(
-                "populated new `events` columns up to %s/%i: updated %i rows",
+                "populated new `events` columns up to %i/%i: updated %i rows",
                 endpoint,
                 max_stream_ordering_inclusive,
                 txn.rowcount,
             )
 
-            if endpoint is None:
+            if endpoint >= max_stream_ordering_inclusive:
                 # we're done
                 return True
 

From 5436c3e0adac4aa314f72e8663ca0f6b5ab07e3f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 22 Nov 2022 16:50:48 +0000
Subject: [PATCH 180/278] Ignore device list updates for users with large
 numbers of devices

---
 synapse/storage/databases/main/deviceinbox.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index 73c95ffb6f2..a2da1d808a8 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -777,6 +777,10 @@ def _add_messages_to_local_device_inbox_txn(
                     retcol="device_id",
                 )
 
+                if len(devices) > 1000:
+                    logger.warn("ignoring wildcard to-device messages to %i devices", len(devices))
+                    continue
+
                 message_json = json_encoder.encode(messages_by_device["*"])
                 for device_id in devices:
                     # Add the message for all devices for this user on this

From 694a3fd5c2c3844f2dbfea70bda0b3c9264e33a1 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrewm@element.io>
Date: Fri, 9 Dec 2022 16:36:15 +0000
Subject: [PATCH 181/278] Revert "Delete stale non-e2e devices for users, take
 2 (#14595)"

This reverts commit c2de2ca63060324cf2f80ddf3289b0fd7a4d861b.
---
 changelog.d/14595.misc                    |  1 -
 synapse/handlers/device.py                | 31 +--------
 synapse/storage/databases/main/devices.py | 79 +----------------------
 tests/handlers/test_device.py             |  2 +-
 tests/storage/test_client_ips.py          |  4 +-
 5 files changed, 4 insertions(+), 113 deletions(-)
 delete mode 100644 changelog.d/14595.misc

diff --git a/changelog.d/14595.misc b/changelog.d/14595.misc
deleted file mode 100644
index f9bfc581ad5..00000000000
--- a/changelog.d/14595.misc
+++ /dev/null
@@ -1 +0,0 @@
-Prune user's old devices on login if they have too many.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 7674c187ef3..d4750a32e64 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -52,7 +52,6 @@
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.cancellation import cancellable
-from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import measure_func
 from synapse.util.retryutils import NotRetryingDestination
 
@@ -422,9 +421,6 @@ async def check_device_registered(
 
         self._check_device_name_length(initial_device_display_name)
 
-        # Prune the user's device list if they already have a lot of devices.
-        await self._prune_too_many_devices(user_id)
-
         if device_id is not None:
             new_device = await self.store.store_device(
                 user_id=user_id,
@@ -456,31 +452,6 @@ async def check_device_registered(
 
         raise errors.StoreError(500, "Couldn't generate a device ID.")
 
-    async def _prune_too_many_devices(self, user_id: str) -> None:
-        """Delete any excess old devices this user may have."""
-        device_ids = await self.store.check_too_many_devices_for_user(user_id)
-        if not device_ids:
-            return
-
-        # We don't want to block and try and delete tonnes of devices at once,
-        # so we cap the number of devices we delete synchronously.
-        first_batch, remaining_device_ids = device_ids[:10], device_ids[10:]
-        await self.delete_devices(user_id, first_batch)
-
-        if not remaining_device_ids:
-            return
-
-        # Now spawn a background loop that deletes the rest.
-        async def _prune_too_many_devices_loop() -> None:
-            for batch in batch_iter(remaining_device_ids, 10):
-                await self.delete_devices(user_id, batch)
-
-                await self.clock.sleep(1)
-
-        run_as_background_process(
-            "_prune_too_many_devices_loop", _prune_too_many_devices_loop
-        )
-
     async def _delete_stale_devices(self) -> None:
         """Background task that deletes devices which haven't been accessed for more than
         a configured time period.
@@ -510,7 +481,7 @@ async def delete_all_devices_for_user(
             device_ids = [d for d in device_ids if d != except_device_id]
         await self.delete_devices(user_id, device_ids)
 
-    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
+    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
         """Delete several devices
 
         Args:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 08ccd46a2bf..a5bb4d404e2 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1569,72 +1569,6 @@ def _txn(txn: LoggingTransaction) -> int:
 
         return rows
 
-    async def check_too_many_devices_for_user(self, user_id: str) -> List[str]:
-        """Check if the user has a lot of devices, and if so return the set of
-        devices we can prune.
-
-        This does *not* return hidden devices or devices with E2E keys.
-        """
-
-        num_devices = await self.db_pool.simple_select_one_onecol(
-            table="devices",
-            keyvalues={"user_id": user_id, "hidden": False},
-            retcol="COALESCE(COUNT(*), 0)",
-            desc="count_devices",
-        )
-
-        # We let users have up to ten devices without pruning.
-        if num_devices <= 10:
-            return []
-
-        # We prune everything older than N days.
-        max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000
-
-        if num_devices > 50:
-            # If the user has more than 50 devices, then we chose a last seen
-            # that ensures we keep at most 50 devices.
-            sql = """
-                SELECT last_seen FROM devices
-                LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
-                WHERE
-                    user_id = ?
-                    AND NOT hidden
-                    AND last_seen IS NOT NULL
-                    AND key_json IS NULL
-                ORDER BY last_seen DESC
-                LIMIT 1
-                OFFSET 50
-            """
-
-            rows = await self.db_pool.execute(
-                "check_too_many_devices_for_user_last_seen", None, sql, (user_id,)
-            )
-            if rows:
-                max_last_seen = max(rows[0][0], max_last_seen)
-
-        # Now fetch the devices to delete.
-        sql = """
-            SELECT DISTINCT device_id FROM devices
-            LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
-            WHERE
-                user_id = ?
-                AND NOT hidden
-                AND last_seen < ?
-                AND key_json IS NULL
-            ORDER BY last_seen
-        """
-
-        def check_too_many_devices_for_user_txn(
-            txn: LoggingTransaction,
-        ) -> List[str]:
-            txn.execute(sql, (user_id, max_last_seen))
-            return [device_id for device_id, in txn]
-
-        return await self.db_pool.runInteraction(
-            "check_too_many_devices_for_user",
-            check_too_many_devices_for_user_txn,
-        )
-
 
 class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
     # Because we have write access, this will be a StreamIdGenerator
@@ -1693,7 +1627,6 @@ async def store_device(
                 values={},
                 insertion_values={
                     "display_name": initial_device_display_name,
-                    "last_seen": self._clock.time_msec(),
                     "hidden": False,
                 },
                 desc="store_device",
@@ -1739,15 +1672,7 @@ async def store_device(
             )
             raise StoreError(500, "Problem storing device.")
 
-    @cached(max_entries=0)
-    async def delete_device(self, user_id: str, device_id: str) -> None:
-        raise NotImplementedError()
-
-    # Note: sometimes deleting rows out of `device_inbox` can take a long time,
-    # so we use a cache so that we deduplicate in flight requests to delete
-    # devices.
-    @cachedList(cached_method_name="delete_device", list_name="device_ids")
-    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict:
+    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
         """Deletes several devices.
 
         Args:
@@ -1784,8 +1709,6 @@ def _delete_devices_txn(txn: LoggingTransaction) -> None:
         for device_id in device_ids:
             self.device_id_exists_cache.invalidate((user_id, device_id))
 
-        return {}
-
     async def update_device(
         self, user_id: str, device_id: str, new_display_name: Optional[str] = None
     ) -> None:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index a456bffd632..ce7525e29c0 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -115,7 +115,7 @@ def test_get_devices_by_user(self) -> None:
                 "device_id": "xyz",
                 "display_name": "display 0",
                 "last_seen_ip": None,
-                "last_seen_ts": 1000000,
+                "last_seen_ts": None,
             },
             device_map["xyz"],
         )
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index a9af1babedf..49ad3c13242 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -169,8 +169,6 @@ def test_get_last_client_ip_by_device(self, after_persisting: bool):
             )
         )
 
-        last_seen = self.clock.time_msec()
-
         if after_persisting:
             # Trigger the storage loop
             self.reactor.advance(10)
@@ -191,7 +189,7 @@ def test_get_last_client_ip_by_device(self, after_persisting: bool):
                         "device_id": device_id,
                         "ip": None,
                         "user_agent": None,
-                        "last_seen": last_seen,
+                        "last_seen": None,
                     },
                 ],
             )

From 6cbd5ed0c2d85886d429dbf71bd6b630fe324f1b Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 9 Dec 2022 17:06:34 +0000
Subject: [PATCH 182/278] Revert "Revert "Delete stale non-e2e devices for
 users, take 2 (#14595)""

This reverts commit 694a3fd5c2c3844f2dbfea70bda0b3c9264e33a1.
---
 changelog.d/14595.misc                    |  1 +
 synapse/handlers/device.py                | 31 ++++++++-
 synapse/storage/databases/main/devices.py | 79 ++++++++++++++++++++++-
 tests/handlers/test_device.py             |  2 +-
 tests/storage/test_client_ips.py          |  4 +-
 5 files changed, 113 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14595.misc

diff --git a/changelog.d/14595.misc b/changelog.d/14595.misc
new file mode 100644
index 00000000000..f9bfc581ad5
--- /dev/null
+++ b/changelog.d/14595.misc
@@ -0,0 +1 @@
+Prune user's old devices on login if they have too many.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index d4750a32e64..7674c187ef3 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -52,6 +52,7 @@
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.cancellation import cancellable
+from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import measure_func
 from synapse.util.retryutils import NotRetryingDestination
 
@@ -421,6 +422,9 @@ async def check_device_registered(
 
         self._check_device_name_length(initial_device_display_name)
 
+        # Prune the user's device list if they already have a lot of devices.
+        await self._prune_too_many_devices(user_id)
+
         if device_id is not None:
             new_device = await self.store.store_device(
                 user_id=user_id,
@@ -452,6 +456,31 @@ async def check_device_registered(
 
         raise errors.StoreError(500, "Couldn't generate a device ID.")
 
+    async def _prune_too_many_devices(self, user_id: str) -> None:
+        """Delete any excess old devices this user may have."""
+        device_ids = await self.store.check_too_many_devices_for_user(user_id)
+        if not device_ids:
+            return
+
+        # We don't want to block and try and delete tonnes of devices at once,
+        # so we cap the number of devices we delete synchronously.
+        first_batch, remaining_device_ids = device_ids[:10], device_ids[10:]
+        await self.delete_devices(user_id, first_batch)
+
+        if not remaining_device_ids:
+            return
+
+        # Now spawn a background loop that deletes the rest.
+        async def _prune_too_many_devices_loop() -> None:
+            for batch in batch_iter(remaining_device_ids, 10):
+                await self.delete_devices(user_id, batch)
+
+                await self.clock.sleep(1)
+
+        run_as_background_process(
+            "_prune_too_many_devices_loop", _prune_too_many_devices_loop
+        )
+
     async def _delete_stale_devices(self) -> None:
         """Background task that deletes devices which haven't been accessed for more than
         a configured time period.
@@ -481,7 +510,7 @@ async def delete_all_devices_for_user(
             device_ids = [d for d in device_ids if d != except_device_id]
         await self.delete_devices(user_id, device_ids)
 
-    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
+    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
         """Delete several devices
 
         Args:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index a5bb4d404e2..08ccd46a2bf 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1569,6 +1569,72 @@ def _txn(txn: LoggingTransaction) -> int:
 
         return rows
 
+    async def check_too_many_devices_for_user(self, user_id: str) -> List[str]:
+        """Check if the user has a lot of devices, and if so return the set of
+        devices we can prune.
+
+        This does *not* return hidden devices or devices with E2E keys.
+        """
+
+        num_devices = await self.db_pool.simple_select_one_onecol(
+            table="devices",
+            keyvalues={"user_id": user_id, "hidden": False},
+            retcol="COALESCE(COUNT(*), 0)",
+            desc="count_devices",
+        )
+
+        # We let users have up to ten devices without pruning.
+        if num_devices <= 10:
+            return []
+
+        # We prune everything older than N days.
+        max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000
+
+        if num_devices > 50:
+            # If the user has more than 50 devices, then we chose a last seen
+            # that ensures we keep at most 50 devices.
+            sql = """
+                SELECT last_seen FROM devices
+                LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
+                WHERE
+                    user_id = ?
+                    AND NOT hidden
+                    AND last_seen IS NOT NULL
+                    AND key_json IS NULL
+                ORDER BY last_seen DESC
+                LIMIT 1
+                OFFSET 50
+            """
+
+            rows = await self.db_pool.execute(
+                "check_too_many_devices_for_user_last_seen", None, sql, (user_id,)
+            )
+            if rows:
+                max_last_seen = max(rows[0][0], max_last_seen)
+
+        # Now fetch the devices to delete.
+        sql = """
+            SELECT DISTINCT device_id FROM devices
+            LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
+            WHERE
+                user_id = ?
+                AND NOT hidden
+                AND last_seen < ?
+                AND key_json IS NULL
+            ORDER BY last_seen
+        """
+
+        def check_too_many_devices_for_user_txn(
+            txn: LoggingTransaction,
+        ) -> List[str]:
+            txn.execute(sql, (user_id, max_last_seen))
+            return [device_id for device_id, in txn]
+
+        return await self.db_pool.runInteraction(
+            "check_too_many_devices_for_user",
+            check_too_many_devices_for_user_txn,
+        )
+
 
 class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
     # Because we have write access, this will be a StreamIdGenerator
@@ -1627,6 +1693,7 @@ async def store_device(
                 values={},
                 insertion_values={
                     "display_name": initial_device_display_name,
+                    "last_seen": self._clock.time_msec(),
                     "hidden": False,
                 },
                 desc="store_device",
@@ -1672,7 +1739,15 @@ async def store_device(
             )
             raise StoreError(500, "Problem storing device.")
 
-    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
+    @cached(max_entries=0)
+    async def delete_device(self, user_id: str, device_id: str) -> None:
+        raise NotImplementedError()
+
+    # Note: sometimes deleting rows out of `device_inbox` can take a long time,
+    # so we use a cache so that we deduplicate in flight requests to delete
+    # devices.
+    @cachedList(cached_method_name="delete_device", list_name="device_ids")
+    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict:
         """Deletes several devices.
 
         Args:
@@ -1709,6 +1784,8 @@ def _delete_devices_txn(txn: LoggingTransaction) -> None:
         for device_id in device_ids:
             self.device_id_exists_cache.invalidate((user_id, device_id))
 
+        return {}
+
     async def update_device(
         self, user_id: str, device_id: str, new_display_name: Optional[str] = None
     ) -> None:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index ce7525e29c0..a456bffd632 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -115,7 +115,7 @@ def test_get_devices_by_user(self) -> None:
                 "device_id": "xyz",
                 "display_name": "display 0",
                 "last_seen_ip": None,
-                "last_seen_ts": None,
+                "last_seen_ts": 1000000,
             },
             device_map["xyz"],
         )
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index 49ad3c13242..a9af1babedf 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -169,6 +169,8 @@ def test_get_last_client_ip_by_device(self, after_persisting: bool):
             )
         )
 
+        last_seen = self.clock.time_msec()
+
         if after_persisting:
             # Trigger the storage loop
             self.reactor.advance(10)
@@ -189,7 +191,7 @@ def test_get_last_client_ip_by_device(self, after_persisting: bool):
                         "device_id": device_id,
                         "ip": None,
                         "user_agent": None,
-                        "last_seen": None,
+                        "last_seen": last_seen,
                     },
                 ],
             )

From 2654d61114fc4fccf589888ca63e100179e483b9 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Mon, 19 Dec 2022 17:50:57 +0000
Subject: [PATCH 183/278] Build a set of who we are interested in first and
 foremost

---
 synapse/handlers/e2e_keys.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 5fe102e2f2f..94de0c0377b 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -300,17 +300,17 @@ async def _query_devices_for_destination(
         # queries. We use the more efficient batched query_client_keys for all
         # remaining users
         user_ids_updated = []
-        for (user_id, device_list) in destination_query.items():
-            if user_id in user_ids_updated:
-                continue
-
-            if device_list:
-                continue
 
-            room_ids = await self.store.get_rooms_for_user(user_id)
-            if not room_ids:
-                continue
+        # Perform a user device resync for each user only once and only as long as:
+        # - they have an empty device_list
+        # - they are in some rooms that this server can see
+        users_to_resync_devices = {
+            user_id
+            for (user_id, device_list) in destination_query.items()
+            if (not device_list) and (await self.store.get_rooms_for_user(user_id))
+        }
 
+        for user_id in users_to_resync_devices:
             # We've decided we're sharing a room with this user and should
             # probably be tracking their device lists. However, we haven't
             # done an initial sync on the device list so we do it now.

From 9c4562c74a100676de32e99197c9508304c22ebe Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Mon, 19 Dec 2022 17:53:05 +0000
Subject: [PATCH 184/278] Add log lines

---
 synapse/handlers/e2e_keys.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 94de0c0377b..003f147ca6a 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -238,6 +238,9 @@ async def query_devices(
             # Now fetch any devices that we don't have in our cache
             # TODO It might make sense to propagate cancellations into the
             #      deferreds which are querying remote homeservers.
+            logger.debug(
+                "%d destinations to query devices for", len(remote_queries_not_in_cache)
+            )
             await make_deferred_yieldable(
                 delay_cancellation(
                     defer.gatherResults(
@@ -310,6 +313,12 @@ async def _query_devices_for_destination(
             if (not device_list) and (await self.store.get_rooms_for_user(user_id))
         }
 
+        logger.debug(
+            "%d users to resync devices for from destination %s",
+            len(users_to_resync_devices),
+            destination,
+        )
+
         for user_id in users_to_resync_devices:
             # We've decided we're sharing a room with this user and should
             # probably be tracking their device lists. However, we haven't

From 227c953d99c722ad1c51334a3b43b8d464a792b6 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 20 Dec 2022 11:49:39 +0000
Subject: [PATCH 185/278] Add async helpers

---
 synapse/util/async_helpers.py | 55 ++++++++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 4 deletions(-)

diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index d24c4f68c4d..01e3cd46f65 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -205,7 +205,10 @@ def __repr__(self) -> str:
 
 
 async def concurrently_execute(
-    func: Callable[[T], Any], args: Iterable[T], limit: int
+    func: Callable[[T], Any],
+    args: Iterable[T],
+    limit: int,
+    delay_cancellation: bool = False,
 ) -> None:
     """Executes the function with each argument concurrently while limiting
     the number of concurrent executions.
@@ -215,6 +218,8 @@ async def concurrently_execute(
         args: List of arguments to pass to func, each invocation of func
             gets a single argument.
         limit: Maximum number of conccurent executions.
+        delay_cancellation: Whether to delay cancellation until after the invocations
+            have finished.
 
     Returns:
         None, when all function invocations have finished. The return values
@@ -233,9 +238,16 @@ async def _concurrently_execute_inner(value: T) -> None:
     # We use `itertools.islice` to handle the case where the number of args is
     # less than the limit, avoiding needlessly spawning unnecessary background
     # tasks.
-    await yieldable_gather_results(
-        _concurrently_execute_inner, (value for value in itertools.islice(it, limit))
-    )
+    if delay_cancellation:
+        await yieldable_gather_results_delaying_cancellation(
+            _concurrently_execute_inner,
+            (value for value in itertools.islice(it, limit)),
+        )
+    else:
+        await yieldable_gather_results(
+            _concurrently_execute_inner,
+            (value for value in itertools.islice(it, limit)),
+        )
 
 
 P = ParamSpec("P")
@@ -292,6 +304,41 @@ async def yieldable_gather_results(
         raise dfe.subFailure.value from None
 
 
+async def yieldable_gather_results_delaying_cancellation(
+    func: Callable[Concatenate[T, P], Awaitable[R]],
+    iter: Iterable[T],
+    *args: P.args,
+    **kwargs: P.kwargs,
+) -> List[R]:
+    """Executes the function with each argument concurrently.
+    Cancellation is delayed until after all the results have been gathered.
+
+    See `yieldable_gather_results`.
+
+    Args:
+        func: Function to execute that returns a Deferred
+        iter: An iterable that yields items that get passed as the first
+            argument to the function
+        *args: Arguments to be passed to each call to func
+        **kwargs: Keyword arguments to be passed to each call to func
+
+    Returns
+        A list containing the results of the function
+    """
+    try:
+        return await make_deferred_yieldable(
+            delay_cancellation(
+                defer.gatherResults(
+                    [run_in_background(func, item, *args, **kwargs) for item in iter],  # type: ignore[arg-type]
+                    consumeErrors=True,
+                )
+            )
+        )
+    except defer.FirstError as dfe:
+        assert isinstance(dfe.subFailure.value, BaseException)
+        raise dfe.subFailure.value from None
+
+
 T1 = TypeVar("T1")
 T2 = TypeVar("T2")
 T3 = TypeVar("T3")

From 7d2261f9228be85ae55c874e49ed98b58e812336 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 20 Dec 2022 11:49:56 +0000
Subject: [PATCH 186/278] Limit query_devices_for_destination to 10 concurrent
 invocations

---
 synapse/handlers/e2e_keys.py | 39 ++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 003f147ca6a..21b009ed47a 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -36,8 +36,8 @@
     get_domain_from_id,
     get_verify_key_from_cross_signing_key,
 )
-from synapse.util import json_decoder, unwrapFirstError
-from synapse.util.async_helpers import Linearizer, delay_cancellation
+from synapse.util import json_decoder
+from synapse.util.async_helpers import Linearizer, concurrently_execute
 from synapse.util.cancellation import cancellable
 from synapse.util.retryutils import NotRetryingDestination
 
@@ -241,24 +241,25 @@ async def query_devices(
             logger.debug(
                 "%d destinations to query devices for", len(remote_queries_not_in_cache)
             )
-            await make_deferred_yieldable(
-                delay_cancellation(
-                    defer.gatherResults(
-                        [
-                            run_in_background(
-                                self._query_devices_for_destination,
-                                results,
-                                cross_signing_keys,
-                                failures,
-                                destination,
-                                queries,
-                                timeout,
-                            )
-                            for destination, queries in remote_queries_not_in_cache.items()
-                        ],
-                        consumeErrors=True,
-                    ).addErrback(unwrapFirstError)
+
+            async def _query(
+                destination_queries: Tuple[str, Dict[str, Iterable[str]]]
+            ) -> None:
+                destination, queries = destination_queries
+                return await self._query_devices_for_destination(
+                    results,
+                    cross_signing_keys,
+                    failures,
+                    destination,
+                    queries,
+                    timeout,
                 )
+
+            await concurrently_execute(
+                _query,
+                remote_queries_not_in_cache.items(),
+                10,
+                delay_cancellation=True,
             )
 
             ret = {"device_keys": results, "failures": failures}

From 55f46d499bef2c3e266cd7581dd045cd07fd6fe3 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 20 Dec 2022 14:32:52 +0000
Subject: [PATCH 187/278] Add multi-user device resync in handler

---
 synapse/handlers/device.py | 58 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index d4750a32e64..f95643b8976 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from http import HTTPStatus
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -33,6 +34,7 @@
     Codes,
     FederationDeniedError,
     HttpResponseException,
+    InvalidAPICallError,
     RequestSendFailed,
     SynapseError,
 )
@@ -45,6 +47,7 @@
     JsonDict,
     StreamKeyType,
     StreamToken,
+    UserID,
     get_domain_from_id,
     get_verify_key_from_cross_signing_key,
 )
@@ -893,12 +896,41 @@ class DeviceListWorkerUpdater:
 
     def __init__(self, hs: "HomeServer"):
         from synapse.replication.http.devices import (
+            ReplicationMultiUserDevicesResyncRestServlet,
             ReplicationUserDevicesResyncRestServlet,
         )
 
         self._user_device_resync_client = (
             ReplicationUserDevicesResyncRestServlet.make_client(hs)
         )
+        self._multi_user_device_resync_client = (
+            ReplicationMultiUserDevicesResyncRestServlet.make_client(hs)
+        )
+
+    async def multi_user_device_resync(
+        self, user_ids: List[str], mark_failed_as_stale: bool = True
+    ) -> Dict[str, Optional[JsonDict]]:
+        """
+        Like `user_device_resync` but operates on multiple users **from the same origin**
+        at once.
+
+        Returns:
+            Dict from User ID to the same Dict as `user_device_resync`.
+        """
+        # TODO(BUG): mark_failed_as_stale is not sent.
+        try:
+            return await self._multi_user_device_resync_client(user_ids=user_ids)
+        except SynapseError as err:
+            if not (
+                err.code == HTTPStatus.NOT_FOUND and err.errcode == Codes.UNRECOGNIZED
+            ):
+                raise
+
+            # Fall back to single requests
+            result: Dict[str, Optional[JsonDict]] = {}
+            for user_id in user_ids:
+                result[user_id] = await self._user_device_resync_client(user_id=user_id)
+            return result
 
     async def user_device_resync(
         self, user_id: str, mark_failed_as_stale: bool = True
@@ -914,6 +946,7 @@ async def user_device_resync(
             request:
             https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid
         """
+        # TODO(BUG): mark_failed_as_stale is not sent.
         return await self._user_device_resync_client(user_id=user_id)
 
 
@@ -1160,6 +1193,31 @@ async def _maybe_retry_device_resync(self) -> None:
             # Allow future calls to retry resyncinc out of sync device lists.
             self._resync_retry_in_progress = False
 
+    async def multi_user_device_resync(
+        self, user_ids: List[str], mark_failed_as_stale: bool = True
+    ) -> Dict[str, Optional[JsonDict]]:
+        """
+        Like `user_device_resync` but operates on multiple users **from the same origin**
+        at once.
+
+        Returns:
+            Dict from User ID to the same Dict as `user_device_resync`.
+        """
+        if not user_ids:
+            return {}
+
+        origins = {UserID.from_string(user_id).domain for user_id in user_ids}
+
+        if len(origins) != 1:
+            raise InvalidAPICallError(f"Only one origin permitted, got {origins!r}")
+
+        result = {}
+        # TODO(Perf): Actually batch these up
+        for user_id in user_ids:
+            result[user_id] = await self.user_device_resync(user_id)
+
+        return result
+
     async def user_device_resync(
         self, user_id: str, mark_failed_as_stale: bool = True
     ) -> Optional[JsonDict]:

From b4f98428ca46514b245ca34503d083eecae24fa5 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 20 Dec 2022 14:05:57 +0000
Subject: [PATCH 188/278] Add a replication servlet for multi-user device
 resync

---
 synapse/replication/http/devices.py | 74 ++++++++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 1 deletion(-)

diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py
index 7c4941c3d3f..a41b4289ef5 100644
--- a/synapse/replication/http/devices.py
+++ b/synapse/replication/http/devices.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Optional, Tuple
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
 
 from twisted.web.server import Request
 
 from synapse.http.server import HttpServer
 from synapse.http.servlet import parse_json_object_from_request
+from synapse.logging.opentracing import active_span
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict
 
@@ -84,6 +85,76 @@ async def _handle_request(  # type: ignore[override]
         return 200, user_devices
 
 
+class ReplicationMultiUserDevicesResyncRestServlet(ReplicationEndpoint):
+    """Ask master to resync the device list for multiple users from the same
+    remote server by contacting their server.
+
+    This must happen on master so that the results can be correctly cached in
+    the database and streamed to workers.
+
+    Request format:
+
+        POST /_synapse/replication/multi_user_device_resync
+
+        {
+            "user_ids": ["@alice:example.org", "@bob:example.org", ...]
+        }
+
+    Response is roughly equivalent to ` /_matrix/federation/v1/user/devices/:user_id`
+    response, but there is a map from user ID to response, e.g.:
+
+        {
+            "@alice:example.org": {
+                "devices": [
+                    {
+                        "device_id": "JLAFKJWSCS",
+                        "keys": { ... },
+                        "device_display_name": "Alice's Mobile Phone"
+                    }
+                ]
+            },
+            ...
+        }
+    """
+
+    NAME = "multi_user_device_resync"
+    PATH_ARGS = ()
+    CACHE = False
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__(hs)
+
+        from synapse.handlers.device import DeviceHandler
+
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.device_list_updater = handler.device_list_updater
+
+        self.store = hs.get_datastores().main
+        self.clock = hs.get_clock()
+
+    @staticmethod
+    async def _serialize_payload(user_ids: List[str]) -> JsonDict:  # type: ignore[override]
+        return {"users": user_ids}
+
+    async def _handle_request(  # type: ignore[override]
+        self, request: Request
+    ) -> Tuple[int, Dict[str, Optional[JsonDict]]]:
+        content = parse_json_object_from_request(request)
+        user_ids: List[str] = content["user_ids"]
+
+        logger.info("Resync for %r", user_ids)
+        span = active_span()
+        if span:
+            span.set_tag("user_ids", f"{user_ids!r}")
+
+        multi_user_devices = await self.device_list_updater.multi_user_device_resync(
+            user_ids
+        )
+
+        return 200, multi_user_devices
+
+
 class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint):
     """Ask master to upload keys for the user and send them out over federation to
     update other servers.
@@ -151,4 +222,5 @@ async def _handle_request(  # type: ignore[override]
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     ReplicationUserDevicesResyncRestServlet(hs).register(http_server)
+    ReplicationMultiUserDevicesResyncRestServlet(hs).register(http_server)
     ReplicationUploadKeysForUserRestServlet(hs).register(http_server)

From 369a97a714a49ee7248680092a20040da87d7e7c Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 20 Dec 2022 14:36:13 +0000
Subject: [PATCH 189/278] Use assertions to ensure we don't have our
 expectations broken

---
 synapse/handlers/device.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index f95643b8976..f18cc32bb15 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -917,7 +917,8 @@ async def multi_user_device_resync(
         Returns:
             Dict from User ID to the same Dict as `user_device_resync`.
         """
-        # TODO(BUG): mark_failed_as_stale is not sent.
+        # mark_failed_as_stale is not sent. Ensure this doesn't break expectations.
+        assert mark_failed_as_stale
         try:
             return await self._multi_user_device_resync_client(user_ids=user_ids)
         except SynapseError as err:
@@ -946,7 +947,8 @@ async def user_device_resync(
             request:
             https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid
         """
-        # TODO(BUG): mark_failed_as_stale is not sent.
+        # mark_failed_as_stale is not sent. Ensure this doesn't break expectations.
+        assert mark_failed_as_stale
         return await self._user_device_resync_client(user_id=user_id)
 
 

From 3c68aa4342d37ffefc821a2f3c6b1f8e0d91f62b Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 20 Dec 2022 14:38:09 +0000
Subject: [PATCH 190/278] Use the multi-user path even for single users

This is futureproofing: we'll be able to rip out the single-user path later
---
 synapse/handlers/device.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index f18cc32bb15..ee866ce93a5 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -947,9 +947,7 @@ async def user_device_resync(
             request:
             https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid
         """
-        # mark_failed_as_stale is not sent. Ensure this doesn't break expectations.
-        assert mark_failed_as_stale
-        return await self._user_device_resync_client(user_id=user_id)
+        return (await self.multi_user_device_resync([user_id]))[user_id]
 
 
 class DeviceListUpdater(DeviceListWorkerUpdater):

From 3d7451e04f1df9ee995cac5ff51bcaf0840b75c1 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 20 Dec 2022 17:35:24 +0000
Subject: [PATCH 191/278] Split out the marking of failed

---
 synapse/handlers/device.py | 46 +++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index ee866ce93a5..0a74e4d266b 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -1221,16 +1221,27 @@ async def multi_user_device_resync(
     async def user_device_resync(
         self, user_id: str, mark_failed_as_stale: bool = True
     ) -> Optional[JsonDict]:
+        result, failed = await self._user_device_resync_returning_failed(user_id)
+
+        if failed and mark_failed_as_stale:
+            # Mark the remote user's device list as stale so we know we need to retry
+            # it later.
+            await self.store.mark_remote_user_device_cache_as_stale(user_id)
+
+        return result
+
+    async def _user_device_resync_returning_failed(
+        self, user_id: str
+    ) -> Tuple[Optional[JsonDict], bool]:
         """Fetches all devices for a user and updates the device cache with them.
 
         Args:
             user_id: The user's id whose device_list will be updated.
-            mark_failed_as_stale: Whether to mark the user's device list as stale
-                if the attempt to resync failed.
         Returns:
-            A dict with device info as under the "devices" in the result of this
-            request:
-            https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid
+            - A dict with device info as under the "devices" in the result of this
+              request:
+              https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid
+            - True iff the resync failed and the device list should be marked as stale.
         """
         logger.debug("Attempting to resync the device list for %s", user_id)
         log_kv({"message": "Doing resync to update device list."})
@@ -1239,12 +1250,7 @@ async def user_device_resync(
         try:
             result = await self.federation.query_user_devices(origin, user_id)
         except NotRetryingDestination:
-            if mark_failed_as_stale:
-                # Mark the remote user's device list as stale so we know we need to retry
-                # it later.
-                await self.store.mark_remote_user_device_cache_as_stale(user_id)
-
-            return None
+            return None, True
         except (RequestSendFailed, HttpResponseException) as e:
             logger.warning(
                 "Failed to handle device list update for %s: %s",
@@ -1252,23 +1258,18 @@ async def user_device_resync(
                 e,
             )
 
-            if mark_failed_as_stale:
-                # Mark the remote user's device list as stale so we know we need to retry
-                # it later.
-                await self.store.mark_remote_user_device_cache_as_stale(user_id)
-
             # We abort on exceptions rather than accepting the update
             # as otherwise synapse will 'forget' that its device list
             # is out of date. If we bail then we will retry the resync
             # next time we get a device list update for this user_id.
             # This makes it more likely that the device lists will
             # eventually become consistent.
-            return None
+            return None, True
         except FederationDeniedError as e:
             set_tag("error", True)
             log_kv({"reason": "FederationDeniedError"})
             logger.info(e)
-            return None
+            return None, False
         except Exception as e:
             set_tag("error", True)
             log_kv(
@@ -1276,12 +1277,7 @@ async def user_device_resync(
             )
             logger.exception("Failed to handle device list update for %s", user_id)
 
-            if mark_failed_as_stale:
-                # Mark the remote user's device list as stale so we know we need to retry
-                # it later.
-                await self.store.mark_remote_user_device_cache_as_stale(user_id)
-
-            return None
+            return None, True
         log_kv({"result": result})
         stream_id = result["stream_id"]
         devices = result["devices"]
@@ -1363,7 +1359,7 @@ async def user_device_resync(
         # point.
         self._seen_updates[user_id] = {stream_id}
 
-        return result
+        return result, False
 
     async def process_cross_signing_key_update(
         self,

From 712144e7688333d24e2d6a650d6d9e6676de90fc Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 20 Dec 2022 17:56:57 +0000
Subject: [PATCH 192/278] Batch up the DB writes when marking failures

---
 synapse/handlers/device.py                | 13 ++++++++--
 synapse/handlers/devicemessage.py         |  2 +-
 synapse/handlers/federation_event.py      |  2 +-
 synapse/storage/databases/main/devices.py | 30 +++++++++++++++++------
 synapse/types/__init__.py                 |  4 +++
 5 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 0a74e4d266b..68a0c8ccb49 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -1212,9 +1212,18 @@ async def multi_user_device_resync(
             raise InvalidAPICallError(f"Only one origin permitted, got {origins!r}")
 
         result = {}
+        failed = set()
         # TODO(Perf): Actually batch these up
         for user_id in user_ids:
-            result[user_id] = await self.user_device_resync(user_id)
+            user_result, user_failed = await self._user_device_resync_returning_failed(
+                user_id
+            )
+            result[user_id] = user_result
+            if user_failed:
+                failed.add(user_id)
+
+        if mark_failed_as_stale:
+            await self.store.mark_remote_users_device_caches_as_stale(failed)
 
         return result
 
@@ -1226,7 +1235,7 @@ async def user_device_resync(
         if failed and mark_failed_as_stale:
             # Mark the remote user's device list as stale so we know we need to retry
             # it later.
-            await self.store.mark_remote_user_device_cache_as_stale(user_id)
+            await self.store.mark_remote_users_device_caches_as_stale((user_id,))
 
         return result
 
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index 75e89850f5b..00c403db492 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -195,7 +195,7 @@ async def _check_for_unknown_devices(
                 sender_user_id,
                 unknown_devices,
             )
-            await self.store.mark_remote_user_device_cache_as_stale(sender_user_id)
+            await self.store.mark_remote_users_device_caches_as_stale((sender_user_id,))
 
             # Immediately attempt a resync in the background
             run_in_background(self._user_device_resync, user_id=sender_user_id)
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 31df7f55cc9..6df000faafe 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1423,7 +1423,7 @@ async def _resync_device(self, sender: str) -> None:
         """
 
         try:
-            await self._store.mark_remote_user_device_cache_as_stale(sender)
+            await self._store.mark_remote_users_device_caches_as_stale((sender,))
 
             # Immediately attempt a resync in the background
             if self._config.worker.worker_app:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index a5bb4d404e2..a921332cb04 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -54,7 +54,7 @@
     AbstractStreamIdTracker,
     StreamIdGenerator,
 )
-from synapse.types import JsonDict, get_verify_key_from_cross_signing_key
+from synapse.types import JsonDict, StrCollection, get_verify_key_from_cross_signing_key
 from synapse.util import json_decoder, json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.caches.lrucache import LruCache
@@ -1062,16 +1062,30 @@ async def get_user_ids_requiring_device_list_resync(
 
         return {row["user_id"] for row in rows}
 
-    async def mark_remote_user_device_cache_as_stale(self, user_id: str) -> None:
+    async def mark_remote_users_device_caches_as_stale(
+        self, user_ids: StrCollection
+    ) -> None:
         """Records that the server has reason to believe the cache of the devices
         for the remote users is out of date.
         """
-        await self.db_pool.simple_upsert(
-            table="device_lists_remote_resync",
-            keyvalues={"user_id": user_id},
-            values={},
-            insertion_values={"added_ts": self._clock.time_msec()},
-            desc="mark_remote_user_device_cache_as_stale",
+
+        def _mark_remote_users_device_caches_as_stale_txn(
+            txn: LoggingTransaction,
+        ) -> None:
+            # TODO add insertion_values support to simple_upsert_many and use
+            #      that!
+            for user_id in user_ids:
+                self.db_pool.simple_upsert_txn(
+                    txn,
+                    table="device_lists_remote_resync",
+                    keyvalues={"user_id": user_id},
+                    values={},
+                    insertion_values={"added_ts": self._clock.time_msec()},
+                )
+
+        await self.db_pool.runInteraction(
+            "mark_remote_users_device_caches_as_stale",
+            _mark_remote_users_device_caches_as_stale_txn,
         )
 
     async def mark_remote_user_device_cache_as_valid(self, user_id: str) -> None:
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index f2d436ddc38..0c725eb9677 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -77,6 +77,10 @@
 # A JSON-serialisable object.
 JsonSerializable = object
 
+# Collection[str] that does not include str itself; str being a Sequence[str]
+# is very misleading and results in bugs.
+StrCollection = Union[Tuple[str, ...], List[str], Set[str]]
+
 
 # Note that this seems to require inheriting *directly* from Interface in order
 # for mypy-zope to realize it is an interface.

From 46807789d0c7a264679164e71ab94c1ef1f7e839 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 20 Dec 2022 18:20:33 +0000
Subject: [PATCH 193/278] Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/14716.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/14716.misc

diff --git a/changelog.d/14716.misc b/changelog.d/14716.misc
new file mode 100644
index 00000000000..ef9522e01db
--- /dev/null
+++ b/changelog.d/14716.misc
@@ -0,0 +1 @@
+Batch up replication requests to request the resyncing of remote users's devices.
\ No newline at end of file

From cbf1cebef304190fda5bf90f1268e6238eb26888 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 20 Dec 2022 18:30:47 +0000
Subject: [PATCH 194/278] Mitigate jump to date slowness by adding 30s timeout

---
 synapse/storage/databases/main/events_worker.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 761b15a8150..f80b494edb2 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -2276,6 +2276,10 @@ async def get_event_id_for_timestamp(
         """
 
         def get_event_id_for_timestamp_txn(txn: LoggingTransaction) -> Optional[str]:
+            if isinstance(self.database_engine, PostgresEngine):
+                # Temporary: make sure these queries can't last more than 30s
+                txn.execute("SET LOCAL statement_timeout = 30000")
+
             txn.execute(
                 sql_template,
                 (room_id, timestamp),

From 3ac08cc0be325a8beaf8cc66780248d8dd4f70f0 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Wed, 21 Dec 2022 11:03:26 +0000
Subject: [PATCH 195/278] Revert "Bump hiredis from 2.0.0 to 2.1.0 (#14699)"

This reverts commit 9c89707b56908e755de546c7d390637cecb53159.
---
 changelog.d/14699.misc |   1 -
 poetry.lock            | 133 +++++++++++++----------------------------
 2 files changed, 43 insertions(+), 91 deletions(-)
 delete mode 100644 changelog.d/14699.misc

diff --git a/changelog.d/14699.misc b/changelog.d/14699.misc
deleted file mode 100644
index d73ef25634b..00000000000
--- a/changelog.d/14699.misc
+++ /dev/null
@@ -1 +0,0 @@
-Bump hiredis from 2.0.0 to 2.1.0.
diff --git a/poetry.lock b/poetry.lock
index 3fbad339a32..9a9a141a14b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -318,11 +318,11 @@ typing-extensions = {version = ">=3.7.4.3", markers = "python_version < \"3.8\""
 
 [[package]]
 name = "hiredis"
-version = "2.1.0"
+version = "2.0.0"
 description = "Python wrapper for hiredis"
 category = "main"
 optional = true
-python-versions = ">=3.7"
+python-versions = ">=3.6"
 
 [[package]]
 name = "hyperlink"
@@ -1867,94 +1867,47 @@ gitpython = [
     {file = "GitPython-3.1.29.tar.gz", hash = "sha256:cc36bfc4a3f913e66805a28e84703e419d9c264c1077e537b54f0e1af85dbefd"},
 ]
 hiredis = [
-    {file = "hiredis-2.1.0-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:7b339a7542a3f6a10b3bbc157e4abc9bae9628e2df7faf5f8a32f730014719ae"},
-    {file = "hiredis-2.1.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:dd82370c2f9f804ec617b95d25edb0fd04882251afb2ecdf08b9ced0c3aa4bcc"},
-    {file = "hiredis-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:92077511d3a62109d5d11bf584e41264a993ae3c77c72de63c1f741b7809bacb"},
-    {file = "hiredis-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6544c7807cbb75bc6ae9ab85773b4413edbcd55342e9e3d7d3f159f677f7428"},
-    {file = "hiredis-2.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8181d73f25943fbdca904154e51b845317103cee08116cfae258f96927ce1e74"},
-    {file = "hiredis-2.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:040f861e4e43daa9287f3a85979542f9c7ee8cfab695fa662f3b6186c6f7d5e8"},
-    {file = "hiredis-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef5ae8c1af82a8000742003cb16a6fa6c57919abb861ab214dcb27db8573ee64"},
-    {file = "hiredis-2.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9b9aa1b0ec46dec5b05dcec22e50bbd4af33da121fca83bd2601dc60c79183f9"},
-    {file = "hiredis-2.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c53c36a630a6c6fd9dfe439f4266e564ca58995015a780c1d964567ebf328466"},
-    {file = "hiredis-2.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:05aab35210bd7fbd7bd066efb2a42eb5c2878c2c137a9cff597204be2c07475b"},
-    {file = "hiredis-2.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e6097e1cef647c665f71cd0e58346389580db98365e804f7a9ad5d96e66b7150"},
-    {file = "hiredis-2.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:32f98370efed38088d000df2eb2c8ed43d93d99bbf4a0a740e15eb4a887cc23f"},
-    {file = "hiredis-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b85276ed57e0aee8910b48383a38a299851935ba134460bad394988c750985fe"},
-    {file = "hiredis-2.1.0-cp310-cp310-win32.whl", hash = "sha256:bd9d99606008a8cfa6b9e950abaa35f5b87496f03e63b73197d02b0fe7ecb6d3"},
-    {file = "hiredis-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:6a8e796c94b7b8c63c99757d6ec2075069e4c362dfb0f130aaf874422bea3e7d"},
-    {file = "hiredis-2.1.0-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:e7bb5cab604fc45b45cee40e84e84d9e30eeb34c571a3784392ae658273bbd23"},
-    {file = "hiredis-2.1.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:e0d4b074ff5ebba00933da27a06f3752b8af2448a6aa9dc895d5279f43011530"},
-    {file = "hiredis-2.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f0c2dbaffd4a9e8df04731a012c8a67b7517abec7e53bb12c3cd749865c63428"},
-    {file = "hiredis-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c19151e79b36e0d849899a21fc10539aa1903af94b31754bddab1bea876cd508"},
-    {file = "hiredis-2.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:08ec41519a533f5cd1f1f8bd1797929358117c8e4570b679b469f768b45b7dbf"},
-    {file = "hiredis-2.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f0db3667fa8abbd37ac66385b460841029033bfc1ba8d7e5b3ff1e01d3346a"},
-    {file = "hiredis-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f592d1522b5981890b34b0b814f4bfa4a68b23ee90f538aac321d17e8bf859c8"},
-    {file = "hiredis-2.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dddd2be67de25a62b3bf871f091181c13da3b32186d4be6af49dadbf6fdc266d"},
-    {file = "hiredis-2.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4ee8f6d0774cd6179c625688201e961a2d03da212230adaa2193cfb7a04f9169"},
-    {file = "hiredis-2.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5000942ffb6b6410ccbc87089c15fde5f48bd205664ee8b3067e6b2fb5689485"},
-    {file = "hiredis-2.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:21e0017b8f50abd13b4c4c4218c7dfd5a42623e3255b460dfa5f70b45c4e7c3e"},
-    {file = "hiredis-2.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:40b55fb46fcc78b04190176c0ae28bfa3cc7f418fca9df06c037028af5942b6a"},
-    {file = "hiredis-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:24a55169a7f0bd9458935ac644bf8191f127c8aa50cdd70c0b87928cc515cae5"},
-    {file = "hiredis-2.1.0-cp311-cp311-win32.whl", hash = "sha256:bb60f79e8c1eb5971b10fd256764ea0c89c4ad2d55ac4379981f678f349411f2"},
-    {file = "hiredis-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:b223668844f26034759a6c24a72f0bb8e4fb64a43b27e2f3e8378639eaac1661"},
-    {file = "hiredis-2.1.0-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:7f7e7d91d6533fcb1939d467cf8bfb98640edf715897959f31ae83f5ad29aed3"},
-    {file = "hiredis-2.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:531d1d3955244831b69272b993e16f93489ce2dadfdf800ac856dc2d9a43d353"},
-    {file = "hiredis-2.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66ffcbfc4db52dd87cdfd53bda45881ab3ab07c80ec43244fd8d70ee69d42c01"},
-    {file = "hiredis-2.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:023b3b3ac410d6cfdb45ee943b8c528c90379f31419a1fd229888aa2b965732d"},
-    {file = "hiredis-2.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c972385a0647120d4b0fe0e9567257cad7b2577b9f1315815713c571af0e778d"},
-    {file = "hiredis-2.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32893825426e73d57b3290b68110dd76229945e6c79b08a37795f536501935c4"},
-    {file = "hiredis-2.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:262148f9b616c0cdd0f2c6bda45cd0f1ce6ce2d1974efd296b85b44e5c7567c2"},
-    {file = "hiredis-2.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9d601c27b9599fe52cade3096351f92f665e527d29af8d3e29353a76bfcf5615"},
-    {file = "hiredis-2.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:d248acc7d7713c1b3d48ed8ea67d6ba43b104aa67d63078846a3590adbab6b73"},
-    {file = "hiredis-2.1.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:969ffe37a8980a6e5404993ccfe605a40fa6732fa6d7b26a1a718c9121197002"},
-    {file = "hiredis-2.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:288d5d0566d3cbcd800e46c7a547428d321842898b8c7de037a7e78b5644e88a"},
-    {file = "hiredis-2.1.0-cp37-cp37m-win32.whl", hash = "sha256:06cb776d3cd3cbec86010f1bab6895ee16af8036aae8c3594a5e96c24f0f83a5"},
-    {file = "hiredis-2.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:6766376dc43ef186113422ecacec0ece0d4b12c0e5f4b556669e639b20ccabb1"},
-    {file = "hiredis-2.1.0-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:41afba30304adcbe1c93fc8272a7169b7fc4e4d3d470ad8babd391678a519d76"},
-    {file = "hiredis-2.1.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:6df0115f8b0766cd3d12416e2e2e914efed5b1a1a27605c9f37bc92de086877a"},
-    {file = "hiredis-2.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5d7d7078f3b841ad86e35459e9f1a49db6d793b796a25fe866333166196d9fec"},
-    {file = "hiredis-2.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:835c4cbf8b38c83240b3eb9bd575cd1bfefe5ea5c46cc5ac2bf2d1f47d1fd696"},
-    {file = "hiredis-2.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:718589c48e97820bdc2a99e2621b5039884cc23199213756054d10cd309ad56c"},
-    {file = "hiredis-2.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2d96be6917ea8f753691a4674f682dd5e145b70edab28c05aa5552ae873e843"},
-    {file = "hiredis-2.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b5fe1bb4b1525751f3050337097b3b2bfe445836e59a5a0984928dd0797f9abf"},
-    {file = "hiredis-2.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91dc73310b92b4aeccffdcd4a762955fe71380f5eaa4e242ee95019e41519101"},
-    {file = "hiredis-2.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bb858218de60a930a164a991fff001c70b0c3d923d3ae40fef2acf3321126b00"},
-    {file = "hiredis-2.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:53040c3b3488b52f4609775453fc759262f2885b733150ee2e1d88257fdafed8"},
-    {file = "hiredis-2.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a1c9b7d6d7bf35e1e2217b2847710154b11d25bf86b77bb7e190161f8b89917e"},
-    {file = "hiredis-2.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:dfbe939fdddbc7b90cab4124f3ddd6391099fb964f6dab3386aa8cf56f37b5ba"},
-    {file = "hiredis-2.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3a51cb4ea466276a845a940931357b4a876f903eabde514ba95e45050e1c2150"},
-    {file = "hiredis-2.1.0-cp38-cp38-win32.whl", hash = "sha256:8bce4c687136bf13df76072072b9baadbd52f7d1b143fbbda96387f50e8ebaeb"},
-    {file = "hiredis-2.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:1f94684b13fbbee1239303018d5ea900d786e486cdb130cde3144d53f4e262e4"},
-    {file = "hiredis-2.1.0-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:879668ffab582bdffd9f10f6c8797aac055db183f266e3aa3a6438ff0768bc29"},
-    {file = "hiredis-2.1.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:f1d5a99de0fd02438f251e50ec64936d22d542c8e5d80bdec236f9713eeef334"},
-    {file = "hiredis-2.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab622bcddcf334b4b1fc4b22e163e93160e3afdd7feaedd77ac6f258e0c77b68"},
-    {file = "hiredis-2.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:964c4f23ff450fb8d73edf06fc7475a4e81a3f9b03a9a04a907ec81c84052fcf"},
-    {file = "hiredis-2.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9f8b8daef346ffc0268d7086c213ab24c2a3fcbd4249eacfbb3635602c79d20"},
-    {file = "hiredis-2.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e2039cdaa2e6656eae4a2e2537ed77e27f29b7487b97ce7ae6a3cb88d01b968"},
-    {file = "hiredis-2.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43d3168da0a81fa0a9e4bc6e14316beac8e5f1b439ca5cc5af7f9a558cfba741"},
-    {file = "hiredis-2.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0021ba034b74c5006f62e4cfdd79d04c7c720731eda256ce29d769ac6483adc3"},
-    {file = "hiredis-2.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:39a1bb45bcd698baf70ad4e9a94af164525bf053caea7df3777172d20d69538a"},
-    {file = "hiredis-2.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c1b636b05777536a83b4cced157cbdc2d0012d494a9ec2f7b7e07c54296cd773"},
-    {file = "hiredis-2.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:58a7ceb71f967fcc1878fb64666a12fbc5f243ab00d0653d3752a811941d8261"},
-    {file = "hiredis-2.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:c5263c676dc4d55202e7ca0429b949fc6ba7c0dd3a3a2b80538593ab27d82836"},
-    {file = "hiredis-2.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b5879d13025b04903ddf71921812db27fe1156a0952ad253014354d72463aaa9"},
-    {file = "hiredis-2.1.0-cp39-cp39-win32.whl", hash = "sha256:9259f637d77544ffeb97acb0a87fdd192a8aced7a2fbd7439160dbee8341d446"},
-    {file = "hiredis-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:fb818b6e0981e16dfdfc9e507c9842f8d210e6ecaf3edb8ac3039dbd24768839"},
-    {file = "hiredis-2.1.0-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:648d4648bf6b3dcc418a974df143b2f96627ab8b50bda23a57759c273880ecfb"},
-    {file = "hiredis-2.1.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:654949cfc0bc76a5292b6ac111113b2eafb0739e0496495368981ea2e80bf4ec"},
-    {file = "hiredis-2.1.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2a98b835c2088998a47da51b1b3661b587b2d4b3305d03fc9893888cc2aa54"},
-    {file = "hiredis-2.1.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7222bd9243387d778245619d0ac62d35cf72ee746ec0efb7b9b230ae3e0c3a39"},
-    {file = "hiredis-2.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:778f6de73c3abd67d447a3442f89e7d43a8de1eb5093f416af14dddc1d5c9cb5"},
-    {file = "hiredis-2.1.0-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c4cfb61fe642f30a22789055847004393bc65b5686988c64191e379ea4ccd069"},
-    {file = "hiredis-2.1.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03b6bef7eb50415aca87200a511d66a2fd69f1fcc75cfe1408e1201cbe28ddfb"},
-    {file = "hiredis-2.1.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3195e13a700f6ff35894c4920fcce8f6c2b01cdbc01f76fe567753c495849e9b"},
-    {file = "hiredis-2.1.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19f724405c808a89db422ed1010caab80a16d3e5b49632356ae7912513b6d58e"},
-    {file = "hiredis-2.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:8ecebeff966b412138b0cd105d7572f8d5e65e96355af699863890f8370707e6"},
-    {file = "hiredis-2.1.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:4f34eefaf164bf43b29ccc809c168248eb95001837ed0e9e3279891f57ae2fab"},
-    {file = "hiredis-2.1.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11fad16beb9d623ea423c9129bab0e392ea4c84363d61c125f679be3d029442f"},
-    {file = "hiredis-2.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c763eb9a1414c4d665945c70ae2ef74a843600667b0069fe90e2aabc78e5411"},
-    {file = "hiredis-2.1.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edb7f156a8f8a1999574f27bda67dd2bff2d5b180bb6aed996a1792cafbcc668"},
-    {file = "hiredis-2.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e057d5545189d4c9e22ae0f7dc283ea0a225f56999511022c062cce7f9589d69"},
+    {file = "hiredis-2.0.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b4c8b0bc5841e578d5fb32a16e0c305359b987b850a06964bd5a62739d688048"},
+    {file = "hiredis-2.0.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:0adea425b764a08270820531ec2218d0508f8ae15a448568109ffcae050fee26"},
+    {file = "hiredis-2.0.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:3d55e36715ff06cdc0ab62f9591607c4324297b6b6ce5b58cb9928b3defe30ea"},
+    {file = "hiredis-2.0.0-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:5d2a48c80cf5a338d58aae3c16872f4d452345e18350143b3bf7216d33ba7b99"},
+    {file = "hiredis-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:240ce6dc19835971f38caf94b5738092cb1e641f8150a9ef9251b7825506cb05"},
+    {file = "hiredis-2.0.0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:5dc7a94bb11096bc4bffd41a3c4f2b958257085c01522aa81140c68b8bf1630a"},
+    {file = "hiredis-2.0.0-cp36-cp36m-win32.whl", hash = "sha256:139705ce59d94eef2ceae9fd2ad58710b02aee91e7fa0ccb485665ca0ecbec63"},
+    {file = "hiredis-2.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:c39c46d9e44447181cd502a35aad2bb178dbf1b1f86cf4db639d7b9614f837c6"},
+    {file = "hiredis-2.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:adf4dd19d8875ac147bf926c727215a0faf21490b22c053db464e0bf0deb0485"},
+    {file = "hiredis-2.0.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:0f41827028901814c709e744060843c77e78a3aca1e0d6875d2562372fcb405a"},
+    {file = "hiredis-2.0.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:508999bec4422e646b05c95c598b64bdbef1edf0d2b715450a078ba21b385bcc"},
+    {file = "hiredis-2.0.0-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:0d5109337e1db373a892fdcf78eb145ffb6bbd66bb51989ec36117b9f7f9b579"},
+    {file = "hiredis-2.0.0-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:04026461eae67fdefa1949b7332e488224eac9e8f2b5c58c98b54d29af22093e"},
+    {file = "hiredis-2.0.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:a00514362df15af041cc06e97aebabf2895e0a7c42c83c21894be12b84402d79"},
+    {file = "hiredis-2.0.0-cp37-cp37m-win32.whl", hash = "sha256:09004096e953d7ebd508cded79f6b21e05dff5d7361771f59269425108e703bc"},
+    {file = "hiredis-2.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:f8196f739092a78e4f6b1b2172679ed3343c39c61a3e9d722ce6fcf1dac2824a"},
+    {file = "hiredis-2.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:294a6697dfa41a8cba4c365dd3715abc54d29a86a40ec6405d677ca853307cfb"},
+    {file = "hiredis-2.0.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:3dddf681284fe16d047d3ad37415b2e9ccdc6c8986c8062dbe51ab9a358b50a5"},
+    {file = "hiredis-2.0.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:dcef843f8de4e2ff5e35e96ec2a4abbdf403bd0f732ead127bd27e51f38ac298"},
+    {file = "hiredis-2.0.0-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:87c7c10d186f1743a8fd6a971ab6525d60abd5d5d200f31e073cd5e94d7e7a9d"},
+    {file = "hiredis-2.0.0-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:7f0055f1809b911ab347a25d786deff5e10e9cf083c3c3fd2dd04e8612e8d9db"},
+    {file = "hiredis-2.0.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:11d119507bb54e81f375e638225a2c057dda748f2b1deef05c2b1a5d42686048"},
+    {file = "hiredis-2.0.0-cp38-cp38-win32.whl", hash = "sha256:7492af15f71f75ee93d2a618ca53fea8be85e7b625e323315169977fae752426"},
+    {file = "hiredis-2.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:65d653df249a2f95673976e4e9dd7ce10de61cfc6e64fa7eeaa6891a9559c581"},
+    {file = "hiredis-2.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ae8427a5e9062ba66fc2c62fb19a72276cf12c780e8db2b0956ea909c48acff5"},
+    {file = "hiredis-2.0.0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:3f5f7e3a4ab824e3de1e1700f05ad76ee465f5f11f5db61c4b297ec29e692b2e"},
+    {file = "hiredis-2.0.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:e3447d9e074abf0e3cd85aef8131e01ab93f9f0e86654db7ac8a3f73c63706ce"},
+    {file = "hiredis-2.0.0-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:8b42c0dc927b8d7c0eb59f97e6e34408e53bc489f9f90e66e568f329bff3e443"},
+    {file = "hiredis-2.0.0-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:b84f29971f0ad4adaee391c6364e6f780d5aae7e9226d41964b26b49376071d0"},
+    {file = "hiredis-2.0.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:0b39ec237459922c6544d071cdcf92cbb5bc6685a30e7c6d985d8a3e3a75326e"},
+    {file = "hiredis-2.0.0-cp39-cp39-win32.whl", hash = "sha256:a7928283143a401e72a4fad43ecc85b35c27ae699cf5d54d39e1e72d97460e1d"},
+    {file = "hiredis-2.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:a4ee8000454ad4486fb9f28b0cab7fa1cd796fc36d639882d0b34109b5b3aec9"},
+    {file = "hiredis-2.0.0-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1f03d4dadd595f7a69a75709bc81902673fa31964c75f93af74feac2f134cc54"},
+    {file = "hiredis-2.0.0-pp36-pypy36_pp73-manylinux1_x86_64.whl", hash = "sha256:04927a4c651a0e9ec11c68e4427d917e44ff101f761cd3b5bc76f86aaa431d27"},
+    {file = "hiredis-2.0.0-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:a39efc3ade8c1fb27c097fd112baf09d7fd70b8cb10ef1de4da6efbe066d381d"},
+    {file = "hiredis-2.0.0-pp36-pypy36_pp73-win32.whl", hash = "sha256:07bbf9bdcb82239f319b1f09e8ef4bdfaec50ed7d7ea51a56438f39193271163"},
+    {file = "hiredis-2.0.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:807b3096205c7cec861c8803a6738e33ed86c9aae76cac0e19454245a6bbbc0a"},
+    {file = "hiredis-2.0.0-pp37-pypy37_pp73-manylinux1_x86_64.whl", hash = "sha256:1233e303645f468e399ec906b6b48ab7cd8391aae2d08daadbb5cad6ace4bd87"},
+    {file = "hiredis-2.0.0-pp37-pypy37_pp73-manylinux2010_x86_64.whl", hash = "sha256:cb2126603091902767d96bcb74093bd8b14982f41809f85c9b96e519c7e1dc41"},
+    {file = "hiredis-2.0.0-pp37-pypy37_pp73-win32.whl", hash = "sha256:f52010e0a44e3d8530437e7da38d11fb822acfb0d5b12e9cd5ba655509937ca0"},
+    {file = "hiredis-2.0.0.tar.gz", hash = "sha256:81d6d8e39695f2c37954d1011c0480ef7cf444d4e3ae24bc5e89ee5de360139a"},
 ]
 hyperlink = [
     {file = "hyperlink-21.0.0-py2.py3-none-any.whl", hash = "sha256:e6b14c37ecb73e89c77d78cdb4c2cc8f3fb59a885c5b3f819ff4ed80f25af1b4"},

From 50a56edcc2500b784c2e2b688966dfa06271dcaa Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Wed, 21 Dec 2022 11:04:34 +0000
Subject: [PATCH 196/278] Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/14718.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/14718.misc

diff --git a/changelog.d/14718.misc b/changelog.d/14718.misc
new file mode 100644
index 00000000000..cda3ededd16
--- /dev/null
+++ b/changelog.d/14718.misc
@@ -0,0 +1 @@
+Revert update of hiredis in Poetry lockfile: revert from 2.1.0 to 2.0.0.
\ No newline at end of file

From 55a4d7e1cee2b3a23818f39125ed7ac29d4f1b1a Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Wed, 21 Dec 2022 11:42:31 +0000
Subject: [PATCH 197/278] Typo fix

---
 synapse/replication/http/devices.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py
index a41b4289ef5..ea5c08e6cfd 100644
--- a/synapse/replication/http/devices.py
+++ b/synapse/replication/http/devices.py
@@ -135,7 +135,7 @@ def __init__(self, hs: "HomeServer"):
 
     @staticmethod
     async def _serialize_payload(user_ids: List[str]) -> JsonDict:  # type: ignore[override]
-        return {"users": user_ids}
+        return {"user_ids": user_ids}
 
     async def _handle_request(  # type: ignore[override]
         self, request: Request

From 62e4ad79914d02fff7e3ebe78a5ba1ebd017b8f5 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Wed, 21 Dec 2022 13:12:53 +0000
Subject: [PATCH 198/278] Use the multi-user device resync to batch requests

---
 synapse/handlers/e2e_keys.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 21b009ed47a..d2188ca08f8 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -320,18 +320,25 @@ async def _query_devices_for_destination(
             destination,
         )
 
-        for user_id in users_to_resync_devices:
-            # We've decided we're sharing a room with this user and should
-            # probably be tracking their device lists. However, we haven't
-            # done an initial sync on the device list so we do it now.
-            try:
-                resync_results = (
-                    await self.device_handler.device_list_updater.user_device_resync(
-                        user_id
-                    )
+        try:
+            user_resync_results = (
+                await self.device_handler.device_list_updater.multi_user_device_resync(
+                    list(users_to_resync_devices)
                 )
+            )
+            for user_id in users_to_resync_devices:
+                resync_results = user_resync_results[user_id]
+
                 if resync_results is None:
-                    raise ValueError("Device resync failed")
+                    # TODO: It's weird that we'll store a failure against a
+                    #       destination, yet continue processing users from that
+                    #       destination.
+                    #       We might want to consider changing this, but for now
+                    #       I'm leaving it as I found it.
+                    failures[destination] = _exception_to_failure(
+                        ValueError(f"Device resync failed for {user_id!r}")
+                    )
+                    continue
 
                 # Add the device keys to the results.
                 user_devices = resync_results["devices"]
@@ -349,8 +356,8 @@ async def _query_devices_for_destination(
 
                 if self_signing_key:
                     cross_signing_keys["self_signing_keys"][user_id] = self_signing_key
-            except Exception as e:
-                failures[destination] = _exception_to_failure(e)
+        except Exception as e:
+            failures[destination] = _exception_to_failure(e)
 
         if len(destination_query) == len(user_ids_updated):
             # We've updated all the users in the query and we do not need to

From 16a447303455030150c13925844ec084c8d9b00d Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 25 Jan 2023 15:14:26 +0000
Subject: [PATCH 199/278] Bump the client-side timeout for /state

to allow faster joins resyncs the chance to complete for large rooms.
We have seen this fair poorly (~90s for Matrix HQ's /state) in testing,
causing the resync to advance to another HS who hasn't seen our join yet.
---
 synapse/federation/transport/client.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 556883f0798..730169f7229 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -102,6 +102,10 @@ async def get_room_state(
             destination,
             path=path,
             args={"event_id": event_id},
+            # This can take a looooooong time for large rooms. Give this a generous
+            # timeout, to avoid the partial state resync timing out early and trying
+            # a bunch of servers who haven't see our join yet.
+            timeout=600,
             parser=_StateParser(room_version),
         )
 

From 840ba65d927e674152e9e7a46bb525de90cf5bf7 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 25 Jan 2023 15:18:17 +0000
Subject: [PATCH 200/278] Changelog

---
 changelog.d/14912.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/14912.misc

diff --git a/changelog.d/14912.misc b/changelog.d/14912.misc
new file mode 100644
index 00000000000..9dbc6b3424a
--- /dev/null
+++ b/changelog.d/14912.misc
@@ -0,0 +1 @@
+Faster joins: allow the resync process more time to fetch `/state` ids.

From 5d8486d43ebddf42245204501bd32855c93be7c4 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 25 Jan 2023 15:27:18 +0000
Subject: [PATCH 201/278] Milliseconds!!!!

---
 synapse/federation/transport/client.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 730169f7229..682666ab360 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -103,9 +103,9 @@ async def get_room_state(
             path=path,
             args={"event_id": event_id},
             # This can take a looooooong time for large rooms. Give this a generous
-            # timeout, to avoid the partial state resync timing out early and trying
-            # a bunch of servers who haven't see our join yet.
-            timeout=600,
+            # timeout of 10 minutes to avoid the partial state resync timing out early
+            # and trying a bunch of servers who haven't seen our join yet.
+            timeout=600_000,
             parser=_StateParser(room_version),
         )
 

From 204bdb7f07d4cb4a6cf5fda404071a4f0210bcbb Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 31 Jan 2023 11:09:27 +0000
Subject: [PATCH 202/278] Tag /send_join responses to detect faster joins

---
 synapse/federation/federation_server.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 3197939a363..22f49033e01 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -63,6 +63,7 @@
 )
 from synapse.logging.opentracing import (
     log_kv,
+    set_tag,
     start_active_span_from_edu,
     tag_args,
     trace,
@@ -678,6 +679,7 @@ async def on_send_join_request(
         room_id: str,
         caller_supports_partial_state: bool = False,
     ) -> Dict[str, Any]:
+        set_tag("partial_state", caller_supports_partial_state)
         await self._room_member_handler._join_rate_per_room_limiter.ratelimit(  # type: ignore[has-type]
             requester=None,
             key=room_id,

From 488cea03990234c5319654c3c8f600888779107a Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 31 Jan 2023 15:24:34 +0000
Subject: [PATCH 203/278] Revert "Tag /send_join responses to detect faster
 joins"

This reverts commit 204bdb7f07d4cb4a6cf5fda404071a4f0210bcbb. I will
cherry pick #14950 in its place.
---
 synapse/federation/federation_server.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 22f49033e01..3197939a363 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -63,7 +63,6 @@
 )
 from synapse.logging.opentracing import (
     log_kv,
-    set_tag,
     start_active_span_from_edu,
     tag_args,
     trace,
@@ -679,7 +678,6 @@ async def on_send_join_request(
         room_id: str,
         caller_supports_partial_state: bool = False,
     ) -> Dict[str, Any]:
-        set_tag("partial_state", caller_supports_partial_state)
         await self._room_member_handler._join_rate_per_room_limiter.ratelimit(  # type: ignore[has-type]
             requester=None,
             key=room_id,

From 873cf5114916d9a98e72701fcf38e567f29ed73e Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 31 Jan 2023 12:43:20 +0000
Subject: [PATCH 204/278] Tag /send_join responses to detect faster joins
 (#14950)

* Tag /send_join responses to detect faster joins

* Changelog

* Define a proper SynapseTag

* isort
---
 changelog.d/14950.misc                  | 1 +
 synapse/federation/federation_server.py | 6 ++++++
 synapse/logging/opentracing.py          | 5 +++++
 3 files changed, 12 insertions(+)
 create mode 100644 changelog.d/14950.misc

diff --git a/changelog.d/14950.misc b/changelog.d/14950.misc
new file mode 100644
index 00000000000..6602776b3ff
--- /dev/null
+++ b/changelog.d/14950.misc
@@ -0,0 +1 @@
+Faster joins: tag `v2/send_join/` requests to indicate if they served a partial join response.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 3197939a363..c9a6dfd1a4b 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -62,7 +62,9 @@
     run_in_background,
 )
 from synapse.logging.opentracing import (
+    SynapseTags,
     log_kv,
+    set_tag,
     start_active_span_from_edu,
     tag_args,
     trace,
@@ -678,6 +680,10 @@ async def on_send_join_request(
         room_id: str,
         caller_supports_partial_state: bool = False,
     ) -> Dict[str, Any]:
+        set_tag(
+            SynapseTags.SEND_JOIN_RESPONSE_IS_PARTIAL_STATE,
+            caller_supports_partial_state,
+        )
         await self._room_member_handler._join_rate_per_room_limiter.ratelimit(  # type: ignore[has-type]
             requester=None,
             key=room_id,
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index a705af83565..8ef9a0dda8e 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -322,6 +322,11 @@ class SynapseTags:
     # The name of the external cache
     CACHE_NAME = "cache.name"
 
+    # Boolean. Present on /v2/send_join requests, omitted from all others.
+    # True iff partial state was requested and we provided (or intended to provide)
+    # partial state in the response.
+    SEND_JOIN_RESPONSE_IS_PARTIAL_STATE = "send_join.partial_state_response"
+
     # Used to tag function arguments
     #
     # Tag a named arg. The name of the argument should be appended to this prefix.

From d6e40d75ff887b411500072414b3748f42db47af Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 31 Jan 2023 15:28:28 +0000
Subject: [PATCH 205/278] HACK: force tracing for partial state /send_join
 responses

---
 synapse/federation/federation_server.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index c9a6dfd1a4b..0df2266de3b 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -63,6 +63,7 @@
 )
 from synapse.logging.opentracing import (
     SynapseTags,
+    force_tracing,
     log_kv,
     set_tag,
     start_active_span_from_edu,
@@ -684,6 +685,10 @@ async def on_send_join_request(
             SynapseTags.SEND_JOIN_RESPONSE_IS_PARTIAL_STATE,
             caller_supports_partial_state,
         )
+        # TEMPORARY HACK: always gather partial join traces, to see if we can find low-
+        # hanging fruit for making them faster.
+        if caller_supports_partial_state:
+            force_tracing()
         await self._room_member_handler._join_rate_per_room_limiter.ratelimit(  # type: ignore[has-type]
             requester=None,
             key=room_id,

From e03b1e858bddb4c0acb6ab728cc5d93d6cb39a82 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 1 Feb 2023 12:27:22 +0000
Subject: [PATCH 206/278] Fix my hack

The tag isn't set unless we force tracing, apparently
---
 synapse/federation/federation_server.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 0df2266de3b..087910dc3c4 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -681,14 +681,14 @@ async def on_send_join_request(
         room_id: str,
         caller_supports_partial_state: bool = False,
     ) -> Dict[str, Any]:
-        set_tag(
-            SynapseTags.SEND_JOIN_RESPONSE_IS_PARTIAL_STATE,
-            caller_supports_partial_state,
-        )
         # TEMPORARY HACK: always gather partial join traces, to see if we can find low-
         # hanging fruit for making them faster.
         if caller_supports_partial_state:
             force_tracing()
+        set_tag(
+            SynapseTags.SEND_JOIN_RESPONSE_IS_PARTIAL_STATE,
+            caller_supports_partial_state,
+        )
         await self._room_member_handler._join_rate_per_room_limiter.ratelimit(  # type: ignore[has-type]
             requester=None,
             key=room_id,

From 769406378c6f307213685309c60b15fe291ee142 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 1 Feb 2023 15:19:15 +0000
Subject: [PATCH 207/278] Cherry-pick "Fetch fewer events when getting hosts in
 room" (WIP #14962)

This is a squashed verion of the following commits:

- bc739a444fb5bffc6dd3b57399a223bc5b5b4a15
- 70947443798e995ff026819cf5dd6823134ac81a
- f781304e1882fd60cdef84d3405767773237629d
- 7ee3976debf925fc2f79381c0c51003588e31e32
---
 changelog.d/14962.feature                    |  1 +
 synapse/storage/databases/main/roommember.py | 47 +++++++++++++++++++-
 2 files changed, 46 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14962.feature

diff --git a/changelog.d/14962.feature b/changelog.d/14962.feature
new file mode 100644
index 00000000000..38f26012f23
--- /dev/null
+++ b/changelog.d/14962.feature
@@ -0,0 +1 @@
+Improve performance when joining or sending an event large rooms.
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 8e2ba7b7b47..49dbd34767e 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from itertools import chain
 from typing import (
     TYPE_CHECKING,
     AbstractSet,
@@ -1131,12 +1132,33 @@ async def _get_joined_hosts(
             else:
                 # The cache doesn't match the state group or prev state group,
                 # so we calculate the result from first principles.
+                #
+                # We need to fetch all hosts joined to the room according to `state` by
+                # inspecting all join memberships in `state`. However, if the `state` is
+                # relatively recent then many of its events are likely to be held in
+                # the current state of the room, which is easily available and likely
+                # cached.
+                #
+                # We therefore compute the set of `state` events not in the
+                # current state and only fetch those.
+                current_memberships = (
+                    await self._get_approximate_current_memberships_in_room(room_id)
+                )
+                unknown_state_events = {}
+                joined_users_in_current_state = []
+
+                for (type, state_key), event_id in state.items():
+                    if event_id not in current_memberships:
+                        unknown_state_events[type, state_key] = event_id
+                    elif current_memberships[event_id] == Membership.JOIN:
+                        joined_users_in_current_state.append(state_key)
+
                 joined_user_ids = await self.get_joined_user_ids_from_state(
-                    room_id, state
+                    room_id, unknown_state_events
                 )
 
                 cache.hosts_to_joined_users = {}
-                for user_id in joined_user_ids:
+                for user_id in chain(joined_user_ids, joined_users_in_current_state):
                     host = intern_string(get_domain_from_id(user_id))
                     cache.hosts_to_joined_users.setdefault(host, set()).add(user_id)
 
@@ -1147,6 +1169,27 @@ async def _get_joined_hosts(
 
         return frozenset(cache.hosts_to_joined_users)
 
+    # TODO: this _might_ turn out to need caching, let's see
+    async def _get_approximate_current_memberships_in_room(
+        self, room_id: str
+    ) -> Mapping[str, Optional[str]]:
+        """Build a map from event id to membership, for all events in the current state.
+
+        The event ids of non-memberships events (e.g. `m.room.power_levels`) are present
+        in the result, mapped to values of `None`.
+
+        The result is approximate for partially-joined rooms. It is fully accurate
+        for fully-joined rooms.
+        """
+
+        rows = await self.db_pool.simple_select_list(
+            "current_state_events",
+            keyvalues={"room_id": room_id},
+            retcols=("event_id", "membership"),
+            desc="has_completed_background_updates",
+        )
+        return {row["event_id"]: row["membership"] for row in rows}
+
     @cached(max_entries=10000)
     def _get_joined_hosts_cache(self, room_id: str) -> "_JoinedHostsCache":
         return _JoinedHostsCache()

From a7fa951876a98869123ad2128b911d02aa42dd20 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 2 Feb 2023 15:28:32 +0000
Subject: [PATCH 208/278] Only notify the target of a membership event

Naughty, but should be a big speedup in large rooms

Changelog
---
 changelog.d/14971.misc                   |  1 +
 synapse/push/bulk_push_rule_evaluator.py | 29 ++++++++++++++++--------
 2 files changed, 21 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14971.misc

diff --git a/changelog.d/14971.misc b/changelog.d/14971.misc
new file mode 100644
index 00000000000..130045a1237
--- /dev/null
+++ b/changelog.d/14971.misc
@@ -0,0 +1 @@
+Improve performance of joining and leaving large rooms with many local users.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index f27ba64d536..bc1cf7bfba0 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -129,15 +129,26 @@ async def _get_rules_for_event(
         Returns:
             Mapping of user ID to their push rules.
         """
-        # We get the users who may need to be notified by first fetching the
-        # local users currently in the room, finding those that have push rules,
-        # and *then* checking which users are actually allowed to see the event.
-        #
-        # The alternative is to first fetch all users that were joined at the
-        # event, but that requires fetching the full state at the event, which
-        # may be expensive for large rooms with few local users.
-
-        local_users = await self.store.get_local_users_in_room(event.room_id)
+        # If this is a membership event, only calculate push rules for the target.
+        # While it's possible for users to configure push rules to respond to such an
+        # event, in practise nobody does this. At the cost of violating the spec a
+        # little, we can skip fetching a huge number of push rules in large rooms.
+        # This helps make joins and leaves faster.
+        if event.type == EventTypes.Member:
+            if self.hs.is_mine_id(event.state_key):
+                local_users = [event.state_key]
+            else:
+                return {}
+        else:
+            # We get the users who may need to be notified by first fetching the
+            # local users currently in the room, finding those that have push rules,
+            # and *then* checking which users are actually allowed to see the event.
+            #
+            # The alternative is to first fetch all users that were joined at the
+            # event, but that requires fetching the full state at the event, which
+            # may be expensive for large rooms with few local users.
+
+            local_users = await self.store.get_local_users_in_room(event.room_id)
 
         # Filter out appservice users.
         local_users = [

From ea3210f8e53772fd3a259e8feddd1afef460ad46 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 2 Feb 2023 16:24:38 +0000
Subject: [PATCH 209/278] Don't notify a target who's not in the room

Also avoid unncessary DB queries when creating a room
---
 synapse/push/bulk_push_rule_evaluator.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index bc1cf7bfba0..bc3a0e6d61d 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -135,8 +135,17 @@ async def _get_rules_for_event(
         # little, we can skip fetching a huge number of push rules in large rooms.
         # This helps make joins and leaves faster.
         if event.type == EventTypes.Member:
-            if self.hs.is_mine_id(event.state_key):
-                local_users = [event.state_key]
+            # We never notify a user about their own actions. This is enforced in
+            # `_action_for_event_by_user` in the loop over `rules_by_user`, but we
+            # do the same check here to avoid unnecessary DB queries.
+            if event.sender != event.state_key and self.hs.is_mine_id(event.state_key):
+                # Check the target is in the room, to avoid notifying them of
+                # e.g. a pre-emptive ban.
+                target_already_in_room = await self.store.check_local_user_in_room(
+                    event.state_key, event.room_id
+                )
+                if target_already_in_room:
+                    local_users = [event.state_key]
             else:
                 return {}
         else:

From c9564e08ba698771d59602d48b2f6b2cf389c5a9 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 2 Feb 2023 16:47:39 +0000
Subject: [PATCH 210/278] Fix stupid bug

---
 synapse/push/bulk_push_rule_evaluator.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index bc3a0e6d61d..adc89c534d5 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -135,6 +135,7 @@ async def _get_rules_for_event(
         # little, we can skip fetching a huge number of push rules in large rooms.
         # This helps make joins and leaves faster.
         if event.type == EventTypes.Member:
+            local_users = []
             # We never notify a user about their own actions. This is enforced in
             # `_action_for_event_by_user` in the loop over `rules_by_user`, but we
             # do the same check here to avoid unnecessary DB queries.
@@ -146,8 +147,6 @@ async def _get_rules_for_event(
                 )
                 if target_already_in_room:
                     local_users = [event.state_key]
-            else:
-                return {}
         else:
             # We get the users who may need to be notified by first fetching the
             # local users currently in the room, finding those that have push rules,
@@ -174,6 +173,9 @@ async def _get_rules_for_event(
                 local_users = list(local_users)
                 local_users.append(invited)
 
+        if not local_users:
+            return {}
+
         rules_by_user = await self.store.bulk_get_push_rules(local_users)
 
         logger.debug("Users in room: %s", local_users)

From 0f63c73cf7c59483312230028f62a3986e67708a Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 2 Feb 2023 11:58:20 -0500
Subject: [PATCH 211/278] Do not calculate presence or ephemeral events when
 they are filtered out (#14970)

This expands the previous optimisation from being only for initial
sync to being for all sync requests.

It also inverts some of the logic to be inclusive instead of exclusive.
---
 changelog.d/14970.misc   |  1 +
 synapse/handlers/sync.py | 19 +++++++++----------
 2 files changed, 10 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/14970.misc

diff --git a/changelog.d/14970.misc b/changelog.d/14970.misc
new file mode 100644
index 00000000000..36576236028
--- /dev/null
+++ b/changelog.d/14970.misc
@@ -0,0 +1 @@
+Improve performance of `/sync` in a few situations.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 5ebd3ea855c..93af99a391c 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1459,10 +1459,12 @@ async def generate_sync_result(
             sync_result_builder, account_data_by_room
         )
 
-        block_all_presence_data = (
-            since_token is None and sync_config.filter_collection.blocks_all_presence()
+        # Presence data is included if the server has it enabled and not filtered out.
+        include_presence_data = (
+            self.hs_config.server.use_presence
+            and not sync_config.filter_collection.blocks_all_presence()
         )
-        if self.hs_config.server.use_presence and not block_all_presence_data:
+        if include_presence_data:
             logger.debug("Fetching presence data")
             await self._generate_sync_entry_for_presence(
                 sync_result_builder,
@@ -1841,15 +1843,12 @@ async def _generate_sync_entry_for_rooms(
         """
 
         since_token = sync_result_builder.since_token
-
-        # 1. Start by fetching all ephemeral events in rooms we've joined (if required).
         user_id = sync_result_builder.sync_config.user.to_string()
-        block_all_room_ephemeral = (
-            since_token is None
-            and sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral()
-        )
 
-        if block_all_room_ephemeral:
+        # 1. Start by fetching all ephemeral events in rooms we've joined (if required).
+        if (
+            sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral()
+        ):
             ephemeral_by_room: Dict[str, List[JsonDict]] = {}
         else:
             now_token, ephemeral_by_room = await self.ephemeral_by_room(

From 306ecd674051d084486d8043f46b50dc38d8e493 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 2 Feb 2023 13:45:12 -0500
Subject: [PATCH 212/278] Skip unused calculations in sync handler. (#14908)

If a sync request does not need to calculate per-room entries &
is not generating presence & is not generating device list data
(e.g. during initial sync) avoid the expensive calculation of room
specific data.

This is a micro-optimisation for clients syncing simply to receive
to-device information.
---
 changelog.d/14908.misc   |   1 +
 synapse/api/filtering.py |   3 +
 synapse/handlers/sync.py | 258 ++++++++++++++++++++-------------------
 3 files changed, 137 insertions(+), 125 deletions(-)
 create mode 100644 changelog.d/14908.misc

diff --git a/changelog.d/14908.misc b/changelog.d/14908.misc
new file mode 100644
index 00000000000..36576236028
--- /dev/null
+++ b/changelog.d/14908.misc
@@ -0,0 +1 @@
+Improve performance of `/sync` in a few situations.
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 4cf8f0cc8ef..2b5af264b43 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -283,6 +283,9 @@ async def filter_room_account_data(
             await self._room_filter.filter(events)
         )
 
+    def blocks_all_rooms(self) -> bool:
+        return self._room_filter.filters_all_rooms()
+
     def blocks_all_presence(self) -> bool:
         return (
             self._presence_filter.filters_all_types()
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 93af99a391c..007ef6e44de 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1448,41 +1448,67 @@ async def generate_sync_result(
             sync_result_builder
         )
 
-        logger.debug("Fetching room data")
-
-        (
-            newly_joined_rooms,
-            newly_joined_or_invited_or_knocked_users,
-            newly_left_rooms,
-            newly_left_users,
-        ) = await self._generate_sync_entry_for_rooms(
-            sync_result_builder, account_data_by_room
-        )
-
         # Presence data is included if the server has it enabled and not filtered out.
-        include_presence_data = (
+        include_presence_data = bool(
             self.hs_config.server.use_presence
             and not sync_config.filter_collection.blocks_all_presence()
         )
-        if include_presence_data:
-            logger.debug("Fetching presence data")
-            await self._generate_sync_entry_for_presence(
-                sync_result_builder,
+        # Device list updates are sent if a since token is provided.
+        include_device_list_updates = bool(since_token and since_token.device_list_key)
+
+        # If we do not care about the rooms or things which depend on the room
+        # data (namely presence and device list updates), then we can skip
+        # this process completely.
+        device_lists = DeviceListUpdates()
+        if (
+            not sync_result_builder.sync_config.filter_collection.blocks_all_rooms()
+            or include_presence_data
+            or include_device_list_updates
+        ):
+            logger.debug("Fetching room data")
+
+            # Note that _generate_sync_entry_for_rooms sets sync_result_builder.joined, which
+            # is used in calculate_user_changes below.
+            (
                 newly_joined_rooms,
-                newly_joined_or_invited_or_knocked_users,
+                newly_left_rooms,
+            ) = await self._generate_sync_entry_for_rooms(
+                sync_result_builder, account_data_by_room
             )
 
+            # Work out which users have joined or left rooms we're in. We use this
+            # to build the presence and device_list parts of the sync response in
+            # `_generate_sync_entry_for_presence` and
+            # `_generate_sync_entry_for_device_list` respectively.
+            if include_presence_data or include_device_list_updates:
+                # This uses the sync_result_builder.joined which is set in
+                # `_generate_sync_entry_for_rooms`, if that didn't find any joined
+                # rooms for some reason it is a no-op.
+                (
+                    newly_joined_or_invited_or_knocked_users,
+                    newly_left_users,
+                ) = sync_result_builder.calculate_user_changes()
+
+                if include_presence_data:
+                    logger.debug("Fetching presence data")
+                    await self._generate_sync_entry_for_presence(
+                        sync_result_builder,
+                        newly_joined_rooms,
+                        newly_joined_or_invited_or_knocked_users,
+                    )
+
+                if include_device_list_updates:
+                    device_lists = await self._generate_sync_entry_for_device_list(
+                        sync_result_builder,
+                        newly_joined_rooms=newly_joined_rooms,
+                        newly_joined_or_invited_or_knocked_users=newly_joined_or_invited_or_knocked_users,
+                        newly_left_rooms=newly_left_rooms,
+                        newly_left_users=newly_left_users,
+                    )
+
         logger.debug("Fetching to-device data")
         await self._generate_sync_entry_for_to_device(sync_result_builder)
 
-        device_lists = await self._generate_sync_entry_for_device_list(
-            sync_result_builder,
-            newly_joined_rooms=newly_joined_rooms,
-            newly_joined_or_invited_or_knocked_users=newly_joined_or_invited_or_knocked_users,
-            newly_left_rooms=newly_left_rooms,
-            newly_left_users=newly_left_users,
-        )
-
         logger.debug("Fetching OTK data")
         device_id = sync_config.device_id
         one_time_keys_count: JsonDict = {}
@@ -1551,6 +1577,7 @@ async def _generate_sync_entry_for_device_list(
 
         user_id = sync_result_builder.sync_config.user.to_string()
         since_token = sync_result_builder.since_token
+        assert since_token is not None
 
         # Take a copy since these fields will be mutated later.
         newly_joined_or_invited_or_knocked_users = set(
@@ -1558,92 +1585,85 @@ async def _generate_sync_entry_for_device_list(
         )
         newly_left_users = set(newly_left_users)
 
-        if since_token and since_token.device_list_key:
-            # We want to figure out what user IDs the client should refetch
-            # device keys for, and which users we aren't going to track changes
-            # for anymore.
-            #
-            # For the first step we check:
-            #   a. if any users we share a room with have updated their devices,
-            #      and
-            #   b. we also check if we've joined any new rooms, or if a user has
-            #      joined a room we're in.
-            #
-            # For the second step we just find any users we no longer share a
-            # room with by looking at all users that have left a room plus users
-            # that were in a room we've left.
+        # We want to figure out what user IDs the client should refetch
+        # device keys for, and which users we aren't going to track changes
+        # for anymore.
+        #
+        # For the first step we check:
+        #   a. if any users we share a room with have updated their devices,
+        #      and
+        #   b. we also check if we've joined any new rooms, or if a user has
+        #      joined a room we're in.
+        #
+        # For the second step we just find any users we no longer share a
+        # room with by looking at all users that have left a room plus users
+        # that were in a room we've left.
 
-            users_that_have_changed = set()
+        users_that_have_changed = set()
 
-            joined_rooms = sync_result_builder.joined_room_ids
+        joined_rooms = sync_result_builder.joined_room_ids
 
-            # Step 1a, check for changes in devices of users we share a room
-            # with
-            #
-            # We do this in two different ways depending on what we have cached.
-            # If we already have a list of all the user that have changed since
-            # the last sync then it's likely more efficient to compare the rooms
-            # they're in with the rooms the syncing user is in.
-            #
-            # If we don't have that info cached then we get all the users that
-            # share a room with our user and check if those users have changed.
-            cache_result = self.store.get_cached_device_list_changes(
-                since_token.device_list_key
-            )
-            if cache_result.hit:
-                changed_users = cache_result.entities
-
-                result = await self.store.get_rooms_for_users(changed_users)
-
-                for changed_user_id, entries in result.items():
-                    # Check if the changed user shares any rooms with the user,
-                    # or if the changed user is the syncing user (as we always
-                    # want to include device list updates of their own devices).
-                    if user_id == changed_user_id or any(
-                        rid in joined_rooms for rid in entries
-                    ):
-                        users_that_have_changed.add(changed_user_id)
-            else:
-                users_that_have_changed = (
-                    await self._device_handler.get_device_changes_in_shared_rooms(
-                        user_id,
-                        sync_result_builder.joined_room_ids,
-                        from_token=since_token,
-                    )
-                )
-
-            # Step 1b, check for newly joined rooms
-            for room_id in newly_joined_rooms:
-                joined_users = await self.store.get_users_in_room(room_id)
-                newly_joined_or_invited_or_knocked_users.update(joined_users)
+        # Step 1a, check for changes in devices of users we share a room
+        # with
+        #
+        # We do this in two different ways depending on what we have cached.
+        # If we already have a list of all the user that have changed since
+        # the last sync then it's likely more efficient to compare the rooms
+        # they're in with the rooms the syncing user is in.
+        #
+        # If we don't have that info cached then we get all the users that
+        # share a room with our user and check if those users have changed.
+        cache_result = self.store.get_cached_device_list_changes(
+            since_token.device_list_key
+        )
+        if cache_result.hit:
+            changed_users = cache_result.entities
 
-            # TODO: Check that these users are actually new, i.e. either they
-            # weren't in the previous sync *or* they left and rejoined.
-            users_that_have_changed.update(newly_joined_or_invited_or_knocked_users)
+            result = await self.store.get_rooms_for_users(changed_users)
 
-            user_signatures_changed = (
-                await self.store.get_users_whose_signatures_changed(
-                    user_id, since_token.device_list_key
+            for changed_user_id, entries in result.items():
+                # Check if the changed user shares any rooms with the user,
+                # or if the changed user is the syncing user (as we always
+                # want to include device list updates of their own devices).
+                if user_id == changed_user_id or any(
+                    rid in joined_rooms for rid in entries
+                ):
+                    users_that_have_changed.add(changed_user_id)
+        else:
+            users_that_have_changed = (
+                await self._device_handler.get_device_changes_in_shared_rooms(
+                    user_id,
+                    sync_result_builder.joined_room_ids,
+                    from_token=since_token,
                 )
             )
-            users_that_have_changed.update(user_signatures_changed)
 
-            # Now find users that we no longer track
-            for room_id in newly_left_rooms:
-                left_users = await self.store.get_users_in_room(room_id)
-                newly_left_users.update(left_users)
+        # Step 1b, check for newly joined rooms
+        for room_id in newly_joined_rooms:
+            joined_users = await self.store.get_users_in_room(room_id)
+            newly_joined_or_invited_or_knocked_users.update(joined_users)
 
-            # Remove any users that we still share a room with.
-            left_users_rooms = await self.store.get_rooms_for_users(newly_left_users)
-            for user_id, entries in left_users_rooms.items():
-                if any(rid in joined_rooms for rid in entries):
-                    newly_left_users.discard(user_id)
+        # TODO: Check that these users are actually new, i.e. either they
+        # weren't in the previous sync *or* they left and rejoined.
+        users_that_have_changed.update(newly_joined_or_invited_or_knocked_users)
 
-            return DeviceListUpdates(
-                changed=users_that_have_changed, left=newly_left_users
-            )
-        else:
-            return DeviceListUpdates()
+        user_signatures_changed = await self.store.get_users_whose_signatures_changed(
+            user_id, since_token.device_list_key
+        )
+        users_that_have_changed.update(user_signatures_changed)
+
+        # Now find users that we no longer track
+        for room_id in newly_left_rooms:
+            left_users = await self.store.get_users_in_room(room_id)
+            newly_left_users.update(left_users)
+
+        # Remove any users that we still share a room with.
+        left_users_rooms = await self.store.get_rooms_for_users(newly_left_users)
+        for user_id, entries in left_users_rooms.items():
+            if any(rid in joined_rooms for rid in entries):
+                newly_left_users.discard(user_id)
+
+        return DeviceListUpdates(changed=users_that_have_changed, left=newly_left_users)
 
     @trace
     async def _generate_sync_entry_for_to_device(
@@ -1720,6 +1740,7 @@ async def _generate_sync_entry_for_account_data(
         since_token = sync_result_builder.since_token
 
         if since_token and not sync_result_builder.full_state:
+            # TODO Do not fetch room account data if it will be unused.
             (
                 global_account_data,
                 account_data_by_room,
@@ -1736,6 +1757,7 @@ async def _generate_sync_entry_for_account_data(
                     sync_config.user
                 )
         else:
+            # TODO Do not fetch room account data if it will be unused.
             (
                 global_account_data,
                 account_data_by_room,
@@ -1818,7 +1840,7 @@ async def _generate_sync_entry_for_rooms(
         self,
         sync_result_builder: "SyncResultBuilder",
         account_data_by_room: Dict[str, Dict[str, JsonDict]],
-    ) -> Tuple[AbstractSet[str], AbstractSet[str], AbstractSet[str], AbstractSet[str]]:
+    ) -> Tuple[AbstractSet[str], AbstractSet[str]]:
         """Generates the rooms portion of the sync response. Populates the
         `sync_result_builder` with the result.
 
@@ -1831,24 +1853,22 @@ async def _generate_sync_entry_for_rooms(
             account_data_by_room: Dictionary of per room account data
 
         Returns:
-            Returns a 4-tuple describing rooms the user has joined or left, and users who've
-            joined or left rooms any rooms the user is in. This gets used later in
-            `_generate_sync_entry_for_device_list`.
+            Returns a 2-tuple describing rooms the user has joined or left.
 
             Its entries are:
             - newly_joined_rooms
-            - newly_joined_or_invited_or_knocked_users
             - newly_left_rooms
-            - newly_left_users
         """
 
         since_token = sync_result_builder.since_token
         user_id = sync_result_builder.sync_config.user.to_string()
 
         # 1. Start by fetching all ephemeral events in rooms we've joined (if required).
-        if (
-            sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral()
-        ):
+        block_all_room_ephemeral = (
+            sync_result_builder.sync_config.filter_collection.blocks_all_rooms()
+            or sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral()
+        )
+        if block_all_room_ephemeral:
             ephemeral_by_room: Dict[str, List[JsonDict]] = {}
         else:
             now_token, ephemeral_by_room = await self.ephemeral_by_room(
@@ -1870,7 +1890,7 @@ async def _generate_sync_entry_for_rooms(
                     )
                     if not tags_by_room:
                         logger.debug("no-oping sync")
-                        return set(), set(), set(), set()
+                        return set(), set()
 
         # 3. Work out which rooms need reporting in the sync response.
         ignored_users = await self.store.ignored_users(user_id)
@@ -1899,6 +1919,7 @@ async def _generate_sync_entry_for_rooms(
         # joined or archived).
         async def handle_room_entries(room_entry: "RoomSyncResultBuilder") -> None:
             logger.debug("Generating room entry for %s", room_entry.room_id)
+            # Note that this mutates sync_result_builder.{joined,archived}.
             await self._generate_room_entry(
                 sync_result_builder,
                 room_entry,
@@ -1915,20 +1936,7 @@ async def handle_room_entries(room_entry: "RoomSyncResultBuilder") -> None:
         sync_result_builder.invited.extend(invited)
         sync_result_builder.knocked.extend(knocked)
 
-        # 5. Work out which users have joined or left rooms we're in. We use this
-        # to build the device_list part of the sync response in
-        # `_generate_sync_entry_for_device_list`.
-        (
-            newly_joined_or_invited_or_knocked_users,
-            newly_left_users,
-        ) = sync_result_builder.calculate_user_changes()
-
-        return (
-            set(newly_joined_rooms),
-            newly_joined_or_invited_or_knocked_users,
-            set(newly_left_rooms),
-            newly_left_users,
-        )
+        return set(newly_joined_rooms), set(newly_left_rooms)
 
     async def _have_rooms_changed(
         self, sync_result_builder: "SyncResultBuilder"

From 4d2d701840e0265663dbd58a344ca561c20ea718 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 10 Feb 2023 18:22:26 +0000
Subject: [PATCH 213/278] Clean up old changelog files

---
 changelog.d/11583.bugfix  | 1 -
 changelog.d/14374.bugfix  | 1 -
 changelog.d/14716.misc    | 1 -
 changelog.d/14718.misc    | 1 -
 changelog.d/14908.misc    | 1 -
 changelog.d/14912.misc    | 1 -
 changelog.d/14950.misc    | 1 -
 changelog.d/14962.feature | 1 -
 changelog.d/14970.misc    | 1 -
 changelog.d/14971.misc    | 1 -
 10 files changed, 10 deletions(-)
 delete mode 100644 changelog.d/11583.bugfix
 delete mode 100644 changelog.d/14374.bugfix
 delete mode 100644 changelog.d/14716.misc
 delete mode 100644 changelog.d/14718.misc
 delete mode 100644 changelog.d/14908.misc
 delete mode 100644 changelog.d/14912.misc
 delete mode 100644 changelog.d/14950.misc
 delete mode 100644 changelog.d/14962.feature
 delete mode 100644 changelog.d/14970.misc
 delete mode 100644 changelog.d/14971.misc

diff --git a/changelog.d/11583.bugfix b/changelog.d/11583.bugfix
deleted file mode 100644
index d2ed113e212..00000000000
--- a/changelog.d/11583.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a performance regression in `/sync` handling, introduced in 1.49.0.
diff --git a/changelog.d/14374.bugfix b/changelog.d/14374.bugfix
deleted file mode 100644
index 8226f5b6511..00000000000
--- a/changelog.d/14374.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a background database update which could cause poor database performance.
diff --git a/changelog.d/14716.misc b/changelog.d/14716.misc
deleted file mode 100644
index ef9522e01db..00000000000
--- a/changelog.d/14716.misc
+++ /dev/null
@@ -1 +0,0 @@
-Batch up replication requests to request the resyncing of remote users's devices.
\ No newline at end of file
diff --git a/changelog.d/14718.misc b/changelog.d/14718.misc
deleted file mode 100644
index cda3ededd16..00000000000
--- a/changelog.d/14718.misc
+++ /dev/null
@@ -1 +0,0 @@
-Revert update of hiredis in Poetry lockfile: revert from 2.1.0 to 2.0.0.
\ No newline at end of file
diff --git a/changelog.d/14908.misc b/changelog.d/14908.misc
deleted file mode 100644
index 36576236028..00000000000
--- a/changelog.d/14908.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improve performance of `/sync` in a few situations.
diff --git a/changelog.d/14912.misc b/changelog.d/14912.misc
deleted file mode 100644
index 9dbc6b3424a..00000000000
--- a/changelog.d/14912.misc
+++ /dev/null
@@ -1 +0,0 @@
-Faster joins: allow the resync process more time to fetch `/state` ids.
diff --git a/changelog.d/14950.misc b/changelog.d/14950.misc
deleted file mode 100644
index 6602776b3ff..00000000000
--- a/changelog.d/14950.misc
+++ /dev/null
@@ -1 +0,0 @@
-Faster joins: tag `v2/send_join/` requests to indicate if they served a partial join response.
diff --git a/changelog.d/14962.feature b/changelog.d/14962.feature
deleted file mode 100644
index 38f26012f23..00000000000
--- a/changelog.d/14962.feature
+++ /dev/null
@@ -1 +0,0 @@
-Improve performance when joining or sending an event large rooms.
diff --git a/changelog.d/14970.misc b/changelog.d/14970.misc
deleted file mode 100644
index 36576236028..00000000000
--- a/changelog.d/14970.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improve performance of `/sync` in a few situations.
diff --git a/changelog.d/14971.misc b/changelog.d/14971.misc
deleted file mode 100644
index 130045a1237..00000000000
--- a/changelog.d/14971.misc
+++ /dev/null
@@ -1 +0,0 @@
-Improve performance of joining and leaving large rooms with many local users.

From e984b6f9dd5bc5cc696132955ea6d0a87500ae55 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 10 Feb 2023 18:24:15 +0000
Subject: [PATCH 214/278] Use changelog from release branch

---
 CHANGES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index 0a8b0a38982..a2cb957f169 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2002,7 +2002,7 @@ Internal Changes
 Synapse 1.59.1 (2022-05-18)
 ===========================
 
-This release fixes a long-standing issue which could prevent Synapse's user directory from updating properly.
+This release fixes a long-standing issue which could prevent Synapse's user directory for updating properly.
 
 Bugfixes
 ----------------

From 6bb4db78cef8420f4de1eac7e9a98bc2d413c97e Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 21 Feb 2023 20:03:45 +0000
Subject: [PATCH 215/278] MORG HOTFIXES: increase /state response body size
 limit to 600MB

See #15127.
---
 synapse/federation/transport/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index c05d598b70c..9204084c674 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -971,7 +971,7 @@ class _StateParser(ByteParser[StateRequestResponse]):
     CONTENT_TYPE = "application/json"
 
     # As with /send_join, /state responses can be huge.
-    MAX_RESPONSE_SIZE = 500 * 1024 * 1024
+    MAX_RESPONSE_SIZE = 600 * 1024 * 1024
 
     def __init__(self, room_version: RoomVersion):
         self._response = StateRequestResponse([], [])

From ab629c17ccfbc98e8297e7fd3fcf03b641bd3cc0 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 7 Mar 2023 19:49:14 +0000
Subject: [PATCH 216/278] Hack to workaround libera federation pain

as seen in https://github.com/matrix-org/synapse/issues/15216
---
 synapse/federation/sender/per_destination_queue.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index ffc9d95ee70..9bfa0bb7579 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -509,6 +509,17 @@ async def _catch_up_transmission_loop(self) -> None:
                 # servers, but the remote will correctly deduplicate them and
                 # handle it only once.
 
+                # TEMPORARY HACK: the loop body below can block during partial state
+                # resyncs. This is bad---other, fully joined rooms have their federation
+                # sending nobbled. As a stopgap, ignore partial state rooms here.
+                # Any queued messages in rooms that we skip won't be sent to this
+                # destination; they'll wait for us to send a new event in the room.
+                if await self._store.is_partial_state_room(pdu.room_id):
+                    logger.warning(
+                        "SKIPPING CATCHUP FOR PARTIAL STATE ROOM: %s", pdu.room_id
+                    )
+                    continue
+
                 # Step 1, fetch the current extremities
                 extrems = await self._store.get_prev_events_for_room(pdu.room_id)
 

From b4df6be0d00d10b791c6c2bffd7dfa0269aba2c7 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 13 Mar 2023 12:44:50 +0000
Subject: [PATCH 217/278] Revert "Hack to workaround libera federation pain"

This reverts commit ab629c17ccfbc98e8297e7fd3fcf03b641bd3cc0.
---
 synapse/federation/sender/per_destination_queue.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 9bfa0bb7579..ffc9d95ee70 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -509,17 +509,6 @@ async def _catch_up_transmission_loop(self) -> None:
                 # servers, but the remote will correctly deduplicate them and
                 # handle it only once.
 
-                # TEMPORARY HACK: the loop body below can block during partial state
-                # resyncs. This is bad---other, fully joined rooms have their federation
-                # sending nobbled. As a stopgap, ignore partial state rooms here.
-                # Any queued messages in rooms that we skip won't be sent to this
-                # destination; they'll wait for us to send a new event in the room.
-                if await self._store.is_partial_state_room(pdu.room_id):
-                    logger.warning(
-                        "SKIPPING CATCHUP FOR PARTIAL STATE ROOM: %s", pdu.room_id
-                    )
-                    continue
-
                 # Step 1, fetch the current extremities
                 extrems = await self._store.get_prev_events_for_room(pdu.room_id)
 

From 2263c3bcd7d95f6fe6e7eaf1a5a43ff5a04a2d9b Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Wed, 10 May 2023 18:12:34 +0100
Subject: [PATCH 218/278] Redirect event persistence traffic to a particular
 event persister worker

---
 synapse/config/_base.py   |  8 ++++++++
 synapse/config/_base.pyi  |  5 ++++-
 synapse/config/workers.py | 13 ++++++++++++-
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 2ce60610ca6..5697000ee6c 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -947,6 +947,10 @@ class ShardedWorkerHandlingConfig:
 
     instances: List[str]
 
+    # A map of key to instance name. If any of these keys are used,
+    # the associated instance is *always* returned.
+    instances_reserved_for_keys: Dict[str, str] = {}
+
     def should_handle(self, instance_name: str, key: str) -> bool:
         """Whether this instance is responsible for handling the given key."""
         # If no instances are defined we assume some other worker is handling
@@ -964,6 +968,10 @@ def _get_instance(self, key: str) -> str:
         method by default.
         """
 
+        reserved_instance = self.instances_reserved_for_keys.get(key)
+        if reserved_instance is not None:
+            return reserved_instance
+
         if not self.instances:
             raise Exception("Unknown worker")
 
diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi
index b5cec132b4c..f5966c26874 100644
--- a/synapse/config/_base.pyi
+++ b/synapse/config/_base.pyi
@@ -199,7 +199,10 @@ def find_config_files(search_paths: List[str]) -> List[str]: ...
 
 class ShardedWorkerHandlingConfig:
     instances: List[str]
-    def __init__(self, instances: List[str]) -> None: ...
+    instances_reserved_for_keys: Dict[str, str]
+    def __init__(
+        self, instances: List[str], instances_reserved_for_keys: Dict[str, str] = {}
+    ) -> None: ...
     def should_handle(self, instance_name: str, key: str) -> bool: ...  # noqa: F811
 
 class RoutableShardedWorkerHandlingConfig(ShardedWorkerHandlingConfig):
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 95b4047f1d3..09fefffa2b7 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -272,8 +272,19 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
                 "Must only specify one instance to handle `presence` messages."
             )
 
+        # Make the event shard config point specific rooms to a specific worker.
+        # All other rooms should have events persisted by a different worker.
+        chosen_worker_instance = "event_persister-4"
+        event_instances = self.writers.events.copy()
+        event_instances.remove(chosen_worker_instance)
+
+        instances_reserved_for_keys = {
+            "!ioWEdTBHIhOGYVKWyq:libera.chat": chosen_worker_instance,
+            "!bBgnAGciIvrtPXkHkp:libera.chat": chosen_worker_instance,
+        }
+
         self.events_shard_config = RoutableShardedWorkerHandlingConfig(
-            self.writers.events
+            event_instances, instances_reserved_for_keys
         )
 
         # Handle sharded push

From 36b72b9f2e8051d71d3cd9655532f23f9b7283f7 Mon Sep 17 00:00:00 2001
From: Sean Quah <seanq@matrix.org>
Date: Fri, 12 May 2023 11:01:57 +0100
Subject: [PATCH 219/278] Revert "Redirect event persistence traffic to a
 particular event persister worker"

Now that things have settled down in the two rooms we special-cased, we
can return the sharding config to normal.

Rich suspects that event_persister-4 being idle was the cause of
federation senders getting stuck on 2023-05-11. It was observed that the
min stream position of RoomStreamTokens was not advancing.

This reverts commit 2263c3bcd7d95f6fe6e7eaf1a5a43ff5a04a2d9b.
---
 synapse/config/_base.py   |  8 --------
 synapse/config/_base.pyi  |  5 +----
 synapse/config/workers.py | 13 +------------
 3 files changed, 2 insertions(+), 24 deletions(-)

diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 5697000ee6c..2ce60610ca6 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -947,10 +947,6 @@ class ShardedWorkerHandlingConfig:
 
     instances: List[str]
 
-    # A map of key to instance name. If any of these keys are used,
-    # the associated instance is *always* returned.
-    instances_reserved_for_keys: Dict[str, str] = {}
-
     def should_handle(self, instance_name: str, key: str) -> bool:
         """Whether this instance is responsible for handling the given key."""
         # If no instances are defined we assume some other worker is handling
@@ -968,10 +964,6 @@ def _get_instance(self, key: str) -> str:
         method by default.
         """
 
-        reserved_instance = self.instances_reserved_for_keys.get(key)
-        if reserved_instance is not None:
-            return reserved_instance
-
         if not self.instances:
             raise Exception("Unknown worker")
 
diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi
index f5966c26874..b5cec132b4c 100644
--- a/synapse/config/_base.pyi
+++ b/synapse/config/_base.pyi
@@ -199,10 +199,7 @@ def find_config_files(search_paths: List[str]) -> List[str]: ...
 
 class ShardedWorkerHandlingConfig:
     instances: List[str]
-    instances_reserved_for_keys: Dict[str, str]
-    def __init__(
-        self, instances: List[str], instances_reserved_for_keys: Dict[str, str] = {}
-    ) -> None: ...
+    def __init__(self, instances: List[str]) -> None: ...
     def should_handle(self, instance_name: str, key: str) -> bool: ...  # noqa: F811
 
 class RoutableShardedWorkerHandlingConfig(ShardedWorkerHandlingConfig):
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 09fefffa2b7..95b4047f1d3 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -272,19 +272,8 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
                 "Must only specify one instance to handle `presence` messages."
             )
 
-        # Make the event shard config point specific rooms to a specific worker.
-        # All other rooms should have events persisted by a different worker.
-        chosen_worker_instance = "event_persister-4"
-        event_instances = self.writers.events.copy()
-        event_instances.remove(chosen_worker_instance)
-
-        instances_reserved_for_keys = {
-            "!ioWEdTBHIhOGYVKWyq:libera.chat": chosen_worker_instance,
-            "!bBgnAGciIvrtPXkHkp:libera.chat": chosen_worker_instance,
-        }
-
         self.events_shard_config = RoutableShardedWorkerHandlingConfig(
-            event_instances, instances_reserved_for_keys
+            self.writers.events
         )
 
         # Handle sharded push

From 93248076c2a0eecc631a7ce7503f3d5e773e224c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 15 Sep 2023 15:45:41 +0100
Subject: [PATCH 220/278] Temporarily disable webp thumbnailing

---
 synapse/media/thumbnailer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/media/thumbnailer.py b/synapse/media/thumbnailer.py
index d8979813b33..359b3233efa 100644
--- a/synapse/media/thumbnailer.py
+++ b/synapse/media/thumbnailer.py
@@ -51,7 +51,7 @@ def __init__(self, input_path: str):
         self._closed = False
 
         try:
-            self.image = Image.open(input_path)
+            self.image = Image.open(input_path, formats=["jpeg", "gif", "png"])
         except OSError as e:
             # If an error occurs opening the image, a thumbnail won't be able to
             # be generated.

From e60993e42d860329de8fa210081004c77018f966 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <patrickc@matrix.org>
Date: Mon, 25 Sep 2023 08:12:05 -0400
Subject: [PATCH 221/278] Revert "Temporarily disable webp thumbnailing"

This reverts commit 93248076c2a0eecc631a7ce7503f3d5e773e224c.
---
 synapse/media/thumbnailer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/media/thumbnailer.py b/synapse/media/thumbnailer.py
index 359b3233efa..d8979813b33 100644
--- a/synapse/media/thumbnailer.py
+++ b/synapse/media/thumbnailer.py
@@ -51,7 +51,7 @@ def __init__(self, input_path: str):
         self._closed = False
 
         try:
-            self.image = Image.open(input_path, formats=["jpeg", "gif", "png"])
+            self.image = Image.open(input_path)
         except OSError as e:
             # If an error occurs opening the image, a thumbnail won't be able to
             # be generated.

From 96769a229cc249973926848a2410c0c60f9b0c16 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Mon, 16 Oct 2023 17:37:58 +0100
Subject: [PATCH 222/278] TEMPORARY Disable prune_old_outbound_device_pokes

See #16480
---
 synapse/storage/databases/main/devices.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index df596f35f9b..a8206c6afe7 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -160,7 +160,9 @@ def __init__(
             prefilled_cache=device_list_federation_prefill,
         )
 
-        if hs.config.worker.run_background_tasks:
+        # vdh,rei 2023-10-13: disable because it is eating DB
+        # https://github.com/matrix-org/synapse/issues/16480
+        if False and hs.config.worker.run_background_tasks:
             self._clock.looping_call(
                 self._prune_old_outbound_device_pokes, 60 * 60 * 1000
             )

From f3db863420ff2093359d828839952950c0d15584 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Mon, 16 Oct 2023 17:55:05 +0100
Subject: [PATCH 223/278] TEMPORARY Subdivide _resolve_events Measure blocks

---
 synapse/state/v2.py | 171 ++++++++++++++++++++++++--------------------
 1 file changed, 92 insertions(+), 79 deletions(-)

diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index b2e63aed1eb..e224af8dd85 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -38,6 +38,7 @@
 from synapse.api.room_versions import RoomVersion
 from synapse.events import EventBase
 from synapse.types import MutableStateMap, StateMap, StrCollection
+from synapse.util.metrics import Measure
 
 logger = logging.getLogger(__name__)
 
@@ -104,106 +105,118 @@ async def resolve_events_with_store(
     Returns:
         A map from (type, state_key) to event_id.
     """
+    with Measure(clock, "rei_state_res:rews2_a"):  # TODO temporary (rei)
+        logger.debug("Computing conflicted state")
 
-    logger.debug("Computing conflicted state")
+        # We use event_map as a cache, so if its None we need to initialize it
+        if event_map is None:
+            event_map = {}
 
-    # We use event_map as a cache, so if its None we need to initialize it
-    if event_map is None:
-        event_map = {}
+        # First split up the un/conflicted state
+        unconflicted_state, conflicted_state = _seperate(state_sets)
 
-    # First split up the un/conflicted state
-    unconflicted_state, conflicted_state = _seperate(state_sets)
+        if not conflicted_state:
+            return unconflicted_state
 
-    if not conflicted_state:
-        return unconflicted_state
+        logger.debug("%d conflicted state entries", len(conflicted_state))
+        logger.debug("Calculating auth chain difference")
 
-    logger.debug("%d conflicted state entries", len(conflicted_state))
-    logger.debug("Calculating auth chain difference")
+        # Also fetch all auth events that appear in only some of the state sets'
+        # auth chains.
+        auth_diff = await _get_auth_chain_difference(
+            room_id, state_sets, event_map, state_res_store
+        )
 
-    # Also fetch all auth events that appear in only some of the state sets'
-    # auth chains.
-    auth_diff = await _get_auth_chain_difference(
-        room_id, state_sets, event_map, state_res_store
-    )
+    with Measure(clock, "rei_state_res:rews2_b"):  # TODO temporary (rei)
+        full_conflicted_set = set(
+            itertools.chain(
+                itertools.chain.from_iterable(conflicted_state.values()), auth_diff
+            )
+        )
 
-    full_conflicted_set = set(
-        itertools.chain(
-            itertools.chain.from_iterable(conflicted_state.values()), auth_diff
+        events = await state_res_store.get_events(
+            [eid for eid in full_conflicted_set if eid not in event_map],
+            allow_rejected=True,
         )
-    )
+        event_map.update(events)
 
-    events = await state_res_store.get_events(
-        [eid for eid in full_conflicted_set if eid not in event_map],
-        allow_rejected=True,
-    )
-    event_map.update(events)
-
-    # everything in the event map should be in the right room
-    for event in event_map.values():
-        if event.room_id != room_id:
-            raise Exception(
-                "Attempting to state-resolve for room %s with event %s which is in %s"
-                % (
-                    room_id,
-                    event.event_id,
-                    event.room_id,
+    with Measure(clock, "rei_state_res:rews2_c"):  # TODO temporary (rei)
+        # everything in the event map should be in the right room
+        for event in event_map.values():
+            if event.room_id != room_id:
+                raise Exception(
+                    "Attempting to state-resolve for room %s with event %s which is in %s"
+                    % (
+                        room_id,
+                        event.event_id,
+                        event.room_id,
+                    )
                 )
-            )
-
-    full_conflicted_set = {eid for eid in full_conflicted_set if eid in event_map}
 
-    logger.debug("%d full_conflicted_set entries", len(full_conflicted_set))
+        full_conflicted_set = {eid for eid in full_conflicted_set if eid in event_map}
 
-    # Get and sort all the power events (kicks/bans/etc)
-    power_events = (
-        eid for eid in full_conflicted_set if _is_power_event(event_map[eid])
-    )
+        logger.debug("%d full_conflicted_set entries", len(full_conflicted_set))
 
-    sorted_power_events = await _reverse_topological_power_sort(
-        clock, room_id, power_events, event_map, state_res_store, full_conflicted_set
-    )
+        # Get and sort all the power events (kicks/bans/etc)
+        power_events = (
+            eid for eid in full_conflicted_set if _is_power_event(event_map[eid])
+        )
 
-    logger.debug("sorted %d power events", len(sorted_power_events))
-
-    # Now sequentially auth each one
-    resolved_state = await _iterative_auth_checks(
-        clock,
-        room_id,
-        room_version,
-        sorted_power_events,
-        unconflicted_state,
-        event_map,
-        state_res_store,
-    )
+    with Measure(clock, "rei_state_res:rews2_d"):  # TODO temporary (rei)
+        sorted_power_events = await _reverse_topological_power_sort(
+            clock,
+            room_id,
+            power_events,
+            event_map,
+            state_res_store,
+            full_conflicted_set,
+        )
 
-    logger.debug("resolved power events")
+        logger.debug("sorted %d power events", len(sorted_power_events))
+
+    with Measure(clock, "rei_state_res:rews2_e"):  # TODO temporary (rei)
+        # Now sequentially auth each one
+        resolved_state = await _iterative_auth_checks(
+            clock,
+            room_id,
+            room_version,
+            sorted_power_events,
+            unconflicted_state,
+            event_map,
+            state_res_store,
+        )
 
-    # OK, so we've now resolved the power events. Now sort the remaining
-    # events using the mainline of the resolved power level.
+        logger.debug("resolved power events")
 
-    set_power_events = set(sorted_power_events)
-    leftover_events = [
-        ev_id for ev_id in full_conflicted_set if ev_id not in set_power_events
-    ]
+    with Measure(clock, "rei_state_res:rews2_f"):  # TODO temporary (rei)
+        # OK, so we've now resolved the power events. Now sort the remaining
+        # events using the mainline of the resolved power level.
 
-    logger.debug("sorting %d remaining events", len(leftover_events))
+        set_power_events = set(sorted_power_events)
+        leftover_events = [
+            ev_id for ev_id in full_conflicted_set if ev_id not in set_power_events
+        ]
 
-    pl = resolved_state.get((EventTypes.PowerLevels, ""), None)
-    leftover_events = await _mainline_sort(
-        clock, room_id, leftover_events, pl, event_map, state_res_store
-    )
+        logger.debug("sorting %d remaining events", len(leftover_events))
 
-    logger.debug("resolving remaining events")
+    with Measure(clock, "rei_state_res:rews2_g"):  # TODO temporary (rei)
+        pl = resolved_state.get((EventTypes.PowerLevels, ""), None)
+        leftover_events = await _mainline_sort(
+            clock, room_id, leftover_events, pl, event_map, state_res_store
+        )
 
-    resolved_state = await _iterative_auth_checks(
-        clock,
-        room_id,
-        room_version,
-        leftover_events,
-        resolved_state,
-        event_map,
-        state_res_store,
-    )
+    with Measure(clock, "rei_state_res:rews2_h"):  # TODO temporary (rei)
+        logger.debug("resolving remaining events")
+
+        resolved_state = await _iterative_auth_checks(
+            clock,
+            room_id,
+            room_version,
+            leftover_events,
+            resolved_state,
+            event_map,
+            state_res_store,
+        )
 
     logger.debug("resolved")
 

From adfa0fded3f3f8330788417107decc53cac3c49c Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Mon, 16 Oct 2023 18:15:48 +0100
Subject: [PATCH 224/278] TEMPORARY Add more Measure blocks

---
 synapse/state/v2.py | 134 +++++++++++++++++++++++---------------------
 1 file changed, 70 insertions(+), 64 deletions(-)

diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index e224af8dd85..408d3754399 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -124,7 +124,7 @@ async def resolve_events_with_store(
         # Also fetch all auth events that appear in only some of the state sets'
         # auth chains.
         auth_diff = await _get_auth_chain_difference(
-            room_id, state_sets, event_map, state_res_store
+            room_id, state_sets, event_map, state_res_store, clock
         )
 
     with Measure(clock, "rei_state_res:rews2_b"):  # TODO temporary (rei)
@@ -284,6 +284,7 @@ async def _get_auth_chain_difference(
     state_sets: Sequence[StateMap[str]],
     unpersisted_events: Dict[str, EventBase],
     state_res_store: StateResolutionStore,
+    clock: Clock,
 ) -> Set[str]:
     """Compare the auth chains of each state set and return the set of events
     that only appear in some, but not all of the auth chains.
@@ -315,77 +316,82 @@ async def _get_auth_chain_difference(
     # event IDs if they appear in the `unpersisted_events`. This is the intersection of
     # the event's auth chain with the events in `unpersisted_events` *plus* their
     # auth event IDs.
-    events_to_auth_chain: Dict[str, Set[str]] = {}
-    for event in unpersisted_events.values():
-        chain = {event.event_id}
-        events_to_auth_chain[event.event_id] = chain
-
-        to_search = [event]
-        while to_search:
-            for auth_id in to_search.pop().auth_event_ids():
-                chain.add(auth_id)
-                auth_event = unpersisted_events.get(auth_id)
-                if auth_event:
-                    to_search.append(auth_event)
+    with Measure(clock, "rei_state_res:rews2_a1"):  # TODO temporary (rei)
+        events_to_auth_chain: Dict[str, Set[str]] = {}
+        for event in unpersisted_events.values():
+            chain = {event.event_id}
+            events_to_auth_chain[event.event_id] = chain
+
+            to_search = [event]
+            while to_search:
+                for auth_id in to_search.pop().auth_event_ids():
+                    chain.add(auth_id)
+                    auth_event = unpersisted_events.get(auth_id)
+                    if auth_event:
+                        to_search.append(auth_event)
 
     # We now 1) calculate the auth chain difference for the unpersisted events
     # and 2) work out the state sets to pass to the store.
     #
     # Note: If there are no `unpersisted_events` (which is the common case), we can do a
     # much simpler calculation.
-    if unpersisted_events:
-        # The list of state sets to pass to the store, where each state set is a set
-        # of the event ids making up the state. This is similar to `state_sets`,
-        # except that (a) we only have event ids, not the complete
-        # ((type, state_key)->event_id) mappings; and (b) we have stripped out
-        # unpersisted events and replaced them with the persisted events in
-        # their auth chain.
-        state_sets_ids: List[Set[str]] = []
-
-        # For each state set, the unpersisted event IDs reachable (by their auth
-        # chain) from the events in that set.
-        unpersisted_set_ids: List[Set[str]] = []
-
-        for state_set in state_sets:
-            set_ids: Set[str] = set()
-            state_sets_ids.append(set_ids)
-
-            unpersisted_ids: Set[str] = set()
-            unpersisted_set_ids.append(unpersisted_ids)
-
-            for event_id in state_set.values():
-                event_chain = events_to_auth_chain.get(event_id)
-                if event_chain is not None:
-                    # We have an unpersisted event. We add all the auth
-                    # events that it references which are also unpersisted.
-                    set_ids.update(
-                        e for e in event_chain if e not in unpersisted_events
-                    )
-
-                    # We also add the full chain of unpersisted event IDs
-                    # referenced by this state set, so that we can work out the
-                    # auth chain difference of the unpersisted events.
-                    unpersisted_ids.update(
-                        e for e in event_chain if e in unpersisted_events
-                    )
-                else:
-                    set_ids.add(event_id)
-
-        # The auth chain difference of the unpersisted events of the state sets
-        # is calculated by taking the difference between the union and
-        # intersections.
-        union = unpersisted_set_ids[0].union(*unpersisted_set_ids[1:])
-        intersection = unpersisted_set_ids[0].intersection(*unpersisted_set_ids[1:])
+    with Measure(clock, "rei_state_res:rews2_a2"):  # TODO temporary (rei)
+        if unpersisted_events:
+            # The list of state sets to pass to the store, where each state set is a set
+            # of the event ids making up the state. This is similar to `state_sets`,
+            # except that (a) we only have event ids, not the complete
+            # ((type, state_key)->event_id) mappings; and (b) we have stripped out
+            # unpersisted events and replaced them with the persisted events in
+            # their auth chain.
+            state_sets_ids: List[Set[str]] = []
+
+            # For each state set, the unpersisted event IDs reachable (by their auth
+            # chain) from the events in that set.
+            unpersisted_set_ids: List[Set[str]] = []
+
+            for state_set in state_sets:
+                set_ids: Set[str] = set()
+                state_sets_ids.append(set_ids)
+
+                unpersisted_ids: Set[str] = set()
+                unpersisted_set_ids.append(unpersisted_ids)
+
+                for event_id in state_set.values():
+                    event_chain = events_to_auth_chain.get(event_id)
+                    if event_chain is not None:
+                        # We have an unpersisted event. We add all the auth
+                        # events that it references which are also unpersisted.
+                        set_ids.update(
+                            e for e in event_chain if e not in unpersisted_events
+                        )
+
+                        # We also add the full chain of unpersisted event IDs
+                        # referenced by this state set, so that we can work out the
+                        # auth chain difference of the unpersisted events.
+                        unpersisted_ids.update(
+                            e for e in event_chain if e in unpersisted_events
+                        )
+                    else:
+                        set_ids.add(event_id)
+
+            # The auth chain difference of the unpersisted events of the state sets
+            # is calculated by taking the difference between the union and
+            # intersections.
+            union = unpersisted_set_ids[0].union(*unpersisted_set_ids[1:])
+            intersection = unpersisted_set_ids[0].intersection(*unpersisted_set_ids[1:])
+
+            auth_difference_unpersisted_part: StrCollection = union - intersection
+        else:
+            auth_difference_unpersisted_part = ()
+            state_sets_ids = [set(state_set.values()) for state_set in state_sets]
 
-        auth_difference_unpersisted_part: StrCollection = union - intersection
-    else:
-        auth_difference_unpersisted_part = ()
-        state_sets_ids = [set(state_set.values()) for state_set in state_sets]
+    with Measure(clock, "rei_state_res:rews2_a3"):  # TODO temporary (rei)
+        difference = await state_res_store.get_auth_chain_difference(
+            room_id, state_sets_ids
+        )
 
-    difference = await state_res_store.get_auth_chain_difference(
-        room_id, state_sets_ids
-    )
-    difference.update(auth_difference_unpersisted_part)
+    with Measure(clock, "rei_state_res:rews2_a4"):  # TODO temporary (rei)
+        difference.update(auth_difference_unpersisted_part)
 
     return difference
 

From 1e1cf4bb9dee8389a137c18da0251aab8ceab661 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Mon, 16 Oct 2023 18:24:45 +0100
Subject: [PATCH 225/278] Revert "TEMPORARY Add more Measure blocks"

This reverts commit adfa0fded3f3f8330788417107decc53cac3c49c.
---
 synapse/state/v2.py | 134 +++++++++++++++++++++-----------------------
 1 file changed, 64 insertions(+), 70 deletions(-)

diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index 408d3754399..e224af8dd85 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -124,7 +124,7 @@ async def resolve_events_with_store(
         # Also fetch all auth events that appear in only some of the state sets'
         # auth chains.
         auth_diff = await _get_auth_chain_difference(
-            room_id, state_sets, event_map, state_res_store, clock
+            room_id, state_sets, event_map, state_res_store
         )
 
     with Measure(clock, "rei_state_res:rews2_b"):  # TODO temporary (rei)
@@ -284,7 +284,6 @@ async def _get_auth_chain_difference(
     state_sets: Sequence[StateMap[str]],
     unpersisted_events: Dict[str, EventBase],
     state_res_store: StateResolutionStore,
-    clock: Clock,
 ) -> Set[str]:
     """Compare the auth chains of each state set and return the set of events
     that only appear in some, but not all of the auth chains.
@@ -316,82 +315,77 @@ async def _get_auth_chain_difference(
     # event IDs if they appear in the `unpersisted_events`. This is the intersection of
     # the event's auth chain with the events in `unpersisted_events` *plus* their
     # auth event IDs.
-    with Measure(clock, "rei_state_res:rews2_a1"):  # TODO temporary (rei)
-        events_to_auth_chain: Dict[str, Set[str]] = {}
-        for event in unpersisted_events.values():
-            chain = {event.event_id}
-            events_to_auth_chain[event.event_id] = chain
-
-            to_search = [event]
-            while to_search:
-                for auth_id in to_search.pop().auth_event_ids():
-                    chain.add(auth_id)
-                    auth_event = unpersisted_events.get(auth_id)
-                    if auth_event:
-                        to_search.append(auth_event)
+    events_to_auth_chain: Dict[str, Set[str]] = {}
+    for event in unpersisted_events.values():
+        chain = {event.event_id}
+        events_to_auth_chain[event.event_id] = chain
+
+        to_search = [event]
+        while to_search:
+            for auth_id in to_search.pop().auth_event_ids():
+                chain.add(auth_id)
+                auth_event = unpersisted_events.get(auth_id)
+                if auth_event:
+                    to_search.append(auth_event)
 
     # We now 1) calculate the auth chain difference for the unpersisted events
     # and 2) work out the state sets to pass to the store.
     #
     # Note: If there are no `unpersisted_events` (which is the common case), we can do a
     # much simpler calculation.
-    with Measure(clock, "rei_state_res:rews2_a2"):  # TODO temporary (rei)
-        if unpersisted_events:
-            # The list of state sets to pass to the store, where each state set is a set
-            # of the event ids making up the state. This is similar to `state_sets`,
-            # except that (a) we only have event ids, not the complete
-            # ((type, state_key)->event_id) mappings; and (b) we have stripped out
-            # unpersisted events and replaced them with the persisted events in
-            # their auth chain.
-            state_sets_ids: List[Set[str]] = []
-
-            # For each state set, the unpersisted event IDs reachable (by their auth
-            # chain) from the events in that set.
-            unpersisted_set_ids: List[Set[str]] = []
-
-            for state_set in state_sets:
-                set_ids: Set[str] = set()
-                state_sets_ids.append(set_ids)
-
-                unpersisted_ids: Set[str] = set()
-                unpersisted_set_ids.append(unpersisted_ids)
-
-                for event_id in state_set.values():
-                    event_chain = events_to_auth_chain.get(event_id)
-                    if event_chain is not None:
-                        # We have an unpersisted event. We add all the auth
-                        # events that it references which are also unpersisted.
-                        set_ids.update(
-                            e for e in event_chain if e not in unpersisted_events
-                        )
-
-                        # We also add the full chain of unpersisted event IDs
-                        # referenced by this state set, so that we can work out the
-                        # auth chain difference of the unpersisted events.
-                        unpersisted_ids.update(
-                            e for e in event_chain if e in unpersisted_events
-                        )
-                    else:
-                        set_ids.add(event_id)
-
-            # The auth chain difference of the unpersisted events of the state sets
-            # is calculated by taking the difference between the union and
-            # intersections.
-            union = unpersisted_set_ids[0].union(*unpersisted_set_ids[1:])
-            intersection = unpersisted_set_ids[0].intersection(*unpersisted_set_ids[1:])
-
-            auth_difference_unpersisted_part: StrCollection = union - intersection
-        else:
-            auth_difference_unpersisted_part = ()
-            state_sets_ids = [set(state_set.values()) for state_set in state_sets]
+    if unpersisted_events:
+        # The list of state sets to pass to the store, where each state set is a set
+        # of the event ids making up the state. This is similar to `state_sets`,
+        # except that (a) we only have event ids, not the complete
+        # ((type, state_key)->event_id) mappings; and (b) we have stripped out
+        # unpersisted events and replaced them with the persisted events in
+        # their auth chain.
+        state_sets_ids: List[Set[str]] = []
+
+        # For each state set, the unpersisted event IDs reachable (by their auth
+        # chain) from the events in that set.
+        unpersisted_set_ids: List[Set[str]] = []
+
+        for state_set in state_sets:
+            set_ids: Set[str] = set()
+            state_sets_ids.append(set_ids)
+
+            unpersisted_ids: Set[str] = set()
+            unpersisted_set_ids.append(unpersisted_ids)
+
+            for event_id in state_set.values():
+                event_chain = events_to_auth_chain.get(event_id)
+                if event_chain is not None:
+                    # We have an unpersisted event. We add all the auth
+                    # events that it references which are also unpersisted.
+                    set_ids.update(
+                        e for e in event_chain if e not in unpersisted_events
+                    )
 
-    with Measure(clock, "rei_state_res:rews2_a3"):  # TODO temporary (rei)
-        difference = await state_res_store.get_auth_chain_difference(
-            room_id, state_sets_ids
-        )
+                    # We also add the full chain of unpersisted event IDs
+                    # referenced by this state set, so that we can work out the
+                    # auth chain difference of the unpersisted events.
+                    unpersisted_ids.update(
+                        e for e in event_chain if e in unpersisted_events
+                    )
+                else:
+                    set_ids.add(event_id)
+
+        # The auth chain difference of the unpersisted events of the state sets
+        # is calculated by taking the difference between the union and
+        # intersections.
+        union = unpersisted_set_ids[0].union(*unpersisted_set_ids[1:])
+        intersection = unpersisted_set_ids[0].intersection(*unpersisted_set_ids[1:])
+
+        auth_difference_unpersisted_part: StrCollection = union - intersection
+    else:
+        auth_difference_unpersisted_part = ()
+        state_sets_ids = [set(state_set.values()) for state_set in state_sets]
 
-    with Measure(clock, "rei_state_res:rews2_a4"):  # TODO temporary (rei)
-        difference.update(auth_difference_unpersisted_part)
+    difference = await state_res_store.get_auth_chain_difference(
+        room_id, state_sets_ids
+    )
+    difference.update(auth_difference_unpersisted_part)
 
     return difference
 

From 8c60ebf209643d0cf9523c4e274a7cbf48eee09a Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Mon, 16 Oct 2023 18:24:46 +0100
Subject: [PATCH 226/278] Revert "TEMPORARY Subdivide _resolve_events Measure
 blocks"

This reverts commit f3db863420ff2093359d828839952950c0d15584.
---
 synapse/state/v2.py | 171 ++++++++++++++++++++------------------------
 1 file changed, 79 insertions(+), 92 deletions(-)

diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index e224af8dd85..b2e63aed1eb 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -38,7 +38,6 @@
 from synapse.api.room_versions import RoomVersion
 from synapse.events import EventBase
 from synapse.types import MutableStateMap, StateMap, StrCollection
-from synapse.util.metrics import Measure
 
 logger = logging.getLogger(__name__)
 
@@ -105,118 +104,106 @@ async def resolve_events_with_store(
     Returns:
         A map from (type, state_key) to event_id.
     """
-    with Measure(clock, "rei_state_res:rews2_a"):  # TODO temporary (rei)
-        logger.debug("Computing conflicted state")
 
-        # We use event_map as a cache, so if its None we need to initialize it
-        if event_map is None:
-            event_map = {}
+    logger.debug("Computing conflicted state")
 
-        # First split up the un/conflicted state
-        unconflicted_state, conflicted_state = _seperate(state_sets)
+    # We use event_map as a cache, so if its None we need to initialize it
+    if event_map is None:
+        event_map = {}
 
-        if not conflicted_state:
-            return unconflicted_state
+    # First split up the un/conflicted state
+    unconflicted_state, conflicted_state = _seperate(state_sets)
 
-        logger.debug("%d conflicted state entries", len(conflicted_state))
-        logger.debug("Calculating auth chain difference")
+    if not conflicted_state:
+        return unconflicted_state
 
-        # Also fetch all auth events that appear in only some of the state sets'
-        # auth chains.
-        auth_diff = await _get_auth_chain_difference(
-            room_id, state_sets, event_map, state_res_store
-        )
+    logger.debug("%d conflicted state entries", len(conflicted_state))
+    logger.debug("Calculating auth chain difference")
 
-    with Measure(clock, "rei_state_res:rews2_b"):  # TODO temporary (rei)
-        full_conflicted_set = set(
-            itertools.chain(
-                itertools.chain.from_iterable(conflicted_state.values()), auth_diff
-            )
-        )
+    # Also fetch all auth events that appear in only some of the state sets'
+    # auth chains.
+    auth_diff = await _get_auth_chain_difference(
+        room_id, state_sets, event_map, state_res_store
+    )
 
-        events = await state_res_store.get_events(
-            [eid for eid in full_conflicted_set if eid not in event_map],
-            allow_rejected=True,
+    full_conflicted_set = set(
+        itertools.chain(
+            itertools.chain.from_iterable(conflicted_state.values()), auth_diff
         )
-        event_map.update(events)
+    )
 
-    with Measure(clock, "rei_state_res:rews2_c"):  # TODO temporary (rei)
-        # everything in the event map should be in the right room
-        for event in event_map.values():
-            if event.room_id != room_id:
-                raise Exception(
-                    "Attempting to state-resolve for room %s with event %s which is in %s"
-                    % (
-                        room_id,
-                        event.event_id,
-                        event.room_id,
-                    )
+    events = await state_res_store.get_events(
+        [eid for eid in full_conflicted_set if eid not in event_map],
+        allow_rejected=True,
+    )
+    event_map.update(events)
+
+    # everything in the event map should be in the right room
+    for event in event_map.values():
+        if event.room_id != room_id:
+            raise Exception(
+                "Attempting to state-resolve for room %s with event %s which is in %s"
+                % (
+                    room_id,
+                    event.event_id,
+                    event.room_id,
                 )
+            )
 
-        full_conflicted_set = {eid for eid in full_conflicted_set if eid in event_map}
+    full_conflicted_set = {eid for eid in full_conflicted_set if eid in event_map}
 
-        logger.debug("%d full_conflicted_set entries", len(full_conflicted_set))
+    logger.debug("%d full_conflicted_set entries", len(full_conflicted_set))
 
-        # Get and sort all the power events (kicks/bans/etc)
-        power_events = (
-            eid for eid in full_conflicted_set if _is_power_event(event_map[eid])
-        )
+    # Get and sort all the power events (kicks/bans/etc)
+    power_events = (
+        eid for eid in full_conflicted_set if _is_power_event(event_map[eid])
+    )
 
-    with Measure(clock, "rei_state_res:rews2_d"):  # TODO temporary (rei)
-        sorted_power_events = await _reverse_topological_power_sort(
-            clock,
-            room_id,
-            power_events,
-            event_map,
-            state_res_store,
-            full_conflicted_set,
-        )
+    sorted_power_events = await _reverse_topological_power_sort(
+        clock, room_id, power_events, event_map, state_res_store, full_conflicted_set
+    )
 
-        logger.debug("sorted %d power events", len(sorted_power_events))
-
-    with Measure(clock, "rei_state_res:rews2_e"):  # TODO temporary (rei)
-        # Now sequentially auth each one
-        resolved_state = await _iterative_auth_checks(
-            clock,
-            room_id,
-            room_version,
-            sorted_power_events,
-            unconflicted_state,
-            event_map,
-            state_res_store,
-        )
+    logger.debug("sorted %d power events", len(sorted_power_events))
+
+    # Now sequentially auth each one
+    resolved_state = await _iterative_auth_checks(
+        clock,
+        room_id,
+        room_version,
+        sorted_power_events,
+        unconflicted_state,
+        event_map,
+        state_res_store,
+    )
 
-        logger.debug("resolved power events")
+    logger.debug("resolved power events")
 
-    with Measure(clock, "rei_state_res:rews2_f"):  # TODO temporary (rei)
-        # OK, so we've now resolved the power events. Now sort the remaining
-        # events using the mainline of the resolved power level.
+    # OK, so we've now resolved the power events. Now sort the remaining
+    # events using the mainline of the resolved power level.
 
-        set_power_events = set(sorted_power_events)
-        leftover_events = [
-            ev_id for ev_id in full_conflicted_set if ev_id not in set_power_events
-        ]
+    set_power_events = set(sorted_power_events)
+    leftover_events = [
+        ev_id for ev_id in full_conflicted_set if ev_id not in set_power_events
+    ]
 
-        logger.debug("sorting %d remaining events", len(leftover_events))
+    logger.debug("sorting %d remaining events", len(leftover_events))
 
-    with Measure(clock, "rei_state_res:rews2_g"):  # TODO temporary (rei)
-        pl = resolved_state.get((EventTypes.PowerLevels, ""), None)
-        leftover_events = await _mainline_sort(
-            clock, room_id, leftover_events, pl, event_map, state_res_store
-        )
+    pl = resolved_state.get((EventTypes.PowerLevels, ""), None)
+    leftover_events = await _mainline_sort(
+        clock, room_id, leftover_events, pl, event_map, state_res_store
+    )
 
-    with Measure(clock, "rei_state_res:rews2_h"):  # TODO temporary (rei)
-        logger.debug("resolving remaining events")
-
-        resolved_state = await _iterative_auth_checks(
-            clock,
-            room_id,
-            room_version,
-            leftover_events,
-            resolved_state,
-            event_map,
-            state_res_store,
-        )
+    logger.debug("resolving remaining events")
+
+    resolved_state = await _iterative_auth_checks(
+        clock,
+        room_id,
+        room_version,
+        leftover_events,
+        resolved_state,
+        event_map,
+        state_res_store,
+    )
 
     logger.debug("resolved")
 

From adfdd6afe120d13ec6cd9d8a1d72b6fba5d8c9f8 Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 17 Oct 2023 11:42:24 +0100
Subject: [PATCH 227/278] TEMPORARY Measure and log test cases

---
 .../databases/main/event_federation.py        | 23 ++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index afffa549853..29fb5b7b663 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -54,6 +54,7 @@
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
+from synapse.util.metrics import Measure
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -413,12 +414,22 @@ async def get_auth_chain_difference(
         room = await self.get_room(room_id)  # type: ignore[attr-defined]
         if room["has_auth_chain_index"]:
             try:
-                return await self.db_pool.runInteraction(
-                    "get_auth_chain_difference_chains",
-                    self._get_auth_chain_difference_using_cover_index_txn,
-                    room_id,
-                    state_sets,
-                )
+                with Measure(self.hs.get_clock(), "rei:get_auth_chain_difference_chains") as m:
+                    r = await self.db_pool.runInteraction(
+                        "get_auth_chain_difference_chains",
+                        self._get_auth_chain_difference_using_cover_index_txn,
+                        room_id,
+                        state_sets,
+                    )
+                    if m.get_resource_usage().ru_utime > 4.0:
+                        logger.info(
+                            "REI-ACDC %.2f, ri=%r ss=%r",
+                            m.get_resource_usage().ru_utime,
+                            room_id,
+                            state_sets
+                        )
+                    return r
+
             except _NoChainCoverIndex:
                 # For whatever reason we don't actually have a chain cover index
                 # for the events in question, so we fall back to the old method.

From e09b91b9d6854a060fc4532ae94c463b526801fa Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Tue, 17 Oct 2023 11:44:57 +0100
Subject: [PATCH 228/278] Revert "TEMPORARY Measure and log test cases"

This reverts commit adfdd6afe120d13ec6cd9d8a1d72b6fba5d8c9f8.
---
 .../databases/main/event_federation.py        | 23 +++++--------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 29fb5b7b663..afffa549853 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -54,7 +54,6 @@
 from synapse.util.caches.lrucache import LruCache
 from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
-from synapse.util.metrics import Measure
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -414,22 +413,12 @@ async def get_auth_chain_difference(
         room = await self.get_room(room_id)  # type: ignore[attr-defined]
         if room["has_auth_chain_index"]:
             try:
-                with Measure(self.hs.get_clock(), "rei:get_auth_chain_difference_chains") as m:
-                    r = await self.db_pool.runInteraction(
-                        "get_auth_chain_difference_chains",
-                        self._get_auth_chain_difference_using_cover_index_txn,
-                        room_id,
-                        state_sets,
-                    )
-                    if m.get_resource_usage().ru_utime > 4.0:
-                        logger.info(
-                            "REI-ACDC %.2f, ri=%r ss=%r",
-                            m.get_resource_usage().ru_utime,
-                            room_id,
-                            state_sets
-                        )
-                    return r
-
+                return await self.db_pool.runInteraction(
+                    "get_auth_chain_difference_chains",
+                    self._get_auth_chain_difference_using_cover_index_txn,
+                    room_id,
+                    state_sets,
+                )
             except _NoChainCoverIndex:
                 # For whatever reason we don't actually have a chain cover index
                 # for the events in question, so we fall back to the old method.

From 01b2477b1048f45f1d218359d7e8f7c2588205cf Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 16 Nov 2023 16:53:04 +0000
Subject: [PATCH 229/278] Revert "Fix test not detecting tables with missing
 primary keys and missing replica identities, then add more replica
 identities. (#16647)"

This reverts commit 830988ae72d63bbb67d2020a3f221664f3f456ee.
---
 changelog.d/16647.misc                        |  1 -
 .../07_common_replica_identities.sql.postgres | 30 -------
 .../06_more_replica_identities.sql.postgres   | 80 -------------------
 3 files changed, 111 deletions(-)
 delete mode 100644 changelog.d/16647.misc
 delete mode 100644 synapse/storage/schema/common/delta/83/07_common_replica_identities.sql.postgres
 delete mode 100644 synapse/storage/schema/main/delta/83/06_more_replica_identities.sql.postgres

diff --git a/changelog.d/16647.misc b/changelog.d/16647.misc
deleted file mode 100644
index baee042f2f2..00000000000
--- a/changelog.d/16647.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add a Postgres `REPLICA IDENTITY` to tables that do not have an implicit one. This should allow use of Postgres logical replication.
\ No newline at end of file
diff --git a/synapse/storage/schema/common/delta/83/07_common_replica_identities.sql.postgres b/synapse/storage/schema/common/delta/83/07_common_replica_identities.sql.postgres
deleted file mode 100644
index 6bdd1f95690..00000000000
--- a/synapse/storage/schema/common/delta/83/07_common_replica_identities.sql.postgres
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright 2023 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- Annotate some tables in Postgres with a REPLICA IDENTITY.
--- Any table that doesn't have a primary key should be annotated explicitly with
--- a REPLICA IDENTITY so that logical replication can be used.
--- If this is not done, then UPDATE and DELETE statements on those tables
--- will fail if logical replication is in use.
-
-
--- Re-use unique indices already defined on tables as a replica identity.
-ALTER TABLE applied_module_schemas REPLICA IDENTITY USING INDEX applied_module_schemas_module_name_file_key;
-ALTER TABLE applied_schema_deltas REPLICA IDENTITY USING INDEX applied_schema_deltas_version_file_key;
-ALTER TABLE background_updates REPLICA IDENTITY USING INDEX background_updates_uniqueness;
-ALTER TABLE schema_compat_version REPLICA IDENTITY USING INDEX schema_compat_version_lock_key;
-ALTER TABLE schema_version REPLICA IDENTITY USING INDEX schema_version_lock_key;
-
-
diff --git a/synapse/storage/schema/main/delta/83/06_more_replica_identities.sql.postgres b/synapse/storage/schema/main/delta/83/06_more_replica_identities.sql.postgres
deleted file mode 100644
index a592af814e0..00000000000
--- a/synapse/storage/schema/main/delta/83/06_more_replica_identities.sql.postgres
+++ /dev/null
@@ -1,80 +0,0 @@
-/* Copyright 2023 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- Annotate some tables in Postgres with a REPLICA IDENTITY.
--- Any table that doesn't have a primary key should be annotated explicitly with
--- a REPLICA IDENTITY so that logical replication can be used.
--- If this is not done, then UPDATE and DELETE statements on those tables
--- will fail if logical replication is in use.
-
-
--- Where possible, re-use unique indices already defined on tables as a replica
--- identity.
-ALTER TABLE account_data REPLICA IDENTITY USING INDEX account_data_uniqueness;
-ALTER TABLE application_services_txns REPLICA IDENTITY USING INDEX application_services_txns_as_id_txn_id_key;
-ALTER TABLE appservice_stream_position REPLICA IDENTITY USING INDEX appservice_stream_position_lock_key;
-ALTER TABLE current_state_events REPLICA IDENTITY USING INDEX current_state_events_event_id_key;
-ALTER TABLE device_lists_changes_converted_stream_position REPLICA IDENTITY USING INDEX device_lists_changes_converted_stream_position_lock_key;
-ALTER TABLE devices REPLICA IDENTITY USING INDEX device_uniqueness;
-ALTER TABLE e2e_device_keys_json REPLICA IDENTITY USING INDEX e2e_device_keys_json_uniqueness;
-ALTER TABLE e2e_fallback_keys_json REPLICA IDENTITY USING INDEX e2e_fallback_keys_json_uniqueness;
-ALTER TABLE e2e_one_time_keys_json REPLICA IDENTITY USING INDEX e2e_one_time_keys_json_uniqueness;
-ALTER TABLE event_backward_extremities REPLICA IDENTITY USING INDEX event_backward_extremities_event_id_room_id_key;
-ALTER TABLE event_edges REPLICA IDENTITY USING INDEX event_edges_event_id_prev_event_id_idx;
-ALTER TABLE event_forward_extremities REPLICA IDENTITY USING INDEX event_forward_extremities_event_id_room_id_key;
-ALTER TABLE event_json REPLICA IDENTITY USING INDEX event_json_event_id_key;
-ALTER TABLE event_push_summary_last_receipt_stream_id REPLICA IDENTITY USING INDEX event_push_summary_last_receipt_stream_id_lock_key;
-ALTER TABLE event_push_summary_stream_ordering REPLICA IDENTITY USING INDEX event_push_summary_stream_ordering_lock_key;
-ALTER TABLE events REPLICA IDENTITY USING INDEX events_event_id_key;
-ALTER TABLE event_to_state_groups REPLICA IDENTITY USING INDEX event_to_state_groups_event_id_key;
-ALTER TABLE event_txn_id_device_id REPLICA IDENTITY USING INDEX event_txn_id_device_id_event_id;
-ALTER TABLE event_txn_id REPLICA IDENTITY USING INDEX event_txn_id_event_id;
-ALTER TABLE local_current_membership REPLICA IDENTITY USING INDEX local_current_membership_idx;
-ALTER TABLE partial_state_events REPLICA IDENTITY USING INDEX partial_state_events_event_id_key;
-ALTER TABLE partial_state_rooms_servers REPLICA IDENTITY USING INDEX partial_state_rooms_servers_room_id_server_name_key;
-ALTER TABLE profiles REPLICA IDENTITY USING INDEX profiles_user_id_key;
-ALTER TABLE redactions REPLICA IDENTITY USING INDEX redactions_event_id_key;
-ALTER TABLE registration_tokens REPLICA IDENTITY USING INDEX registration_tokens_token_key;
-ALTER TABLE rejections REPLICA IDENTITY USING INDEX rejections_event_id_key;
-ALTER TABLE room_account_data REPLICA IDENTITY USING INDEX room_account_data_uniqueness;
-ALTER TABLE room_aliases REPLICA IDENTITY USING INDEX room_aliases_room_alias_key;
-ALTER TABLE room_depth REPLICA IDENTITY USING INDEX room_depth_room_id_key;
-ALTER TABLE room_forgetter_stream_pos REPLICA IDENTITY USING INDEX room_forgetter_stream_pos_lock_key;
-ALTER TABLE room_memberships REPLICA IDENTITY USING INDEX room_memberships_event_id_key;
-ALTER TABLE room_tags REPLICA IDENTITY USING INDEX room_tag_uniqueness;
-ALTER TABLE room_tags_revisions REPLICA IDENTITY USING INDEX room_tag_revisions_uniqueness;
-ALTER TABLE server_keys_json REPLICA IDENTITY USING INDEX server_keys_json_uniqueness;
-ALTER TABLE sessions REPLICA IDENTITY USING INDEX sessions_session_type_session_id_key;
-ALTER TABLE state_events REPLICA IDENTITY USING INDEX state_events_event_id_key;
-ALTER TABLE stats_incremental_position REPLICA IDENTITY USING INDEX stats_incremental_position_lock_key;
-ALTER TABLE threads REPLICA IDENTITY USING INDEX threads_uniqueness;
-ALTER TABLE ui_auth_sessions_credentials REPLICA IDENTITY USING INDEX ui_auth_sessions_credentials_session_id_stage_type_key;
-ALTER TABLE ui_auth_sessions_ips REPLICA IDENTITY USING INDEX ui_auth_sessions_ips_session_id_ip_user_agent_key;
-ALTER TABLE ui_auth_sessions REPLICA IDENTITY USING INDEX ui_auth_sessions_session_id_key;
-ALTER TABLE user_directory_stream_pos REPLICA IDENTITY USING INDEX user_directory_stream_pos_lock_key;
-ALTER TABLE user_external_ids REPLICA IDENTITY USING INDEX user_external_ids_auth_provider_external_id_key;
-ALTER TABLE user_threepids REPLICA IDENTITY USING INDEX medium_address;
-ALTER TABLE worker_read_write_locks_mode REPLICA IDENTITY USING INDEX worker_read_write_locks_mode_key;
-ALTER TABLE worker_read_write_locks REPLICA IDENTITY USING INDEX worker_read_write_locks_key;
-
--- special cases: unique indices on nullable columns can't be used
-ALTER TABLE event_push_actions REPLICA IDENTITY FULL;
-ALTER TABLE local_media_repository REPLICA IDENTITY FULL;
-ALTER TABLE receipts_graph REPLICA IDENTITY FULL;
-ALTER TABLE receipts_linearized REPLICA IDENTITY FULL;
-ALTER TABLE received_transactions REPLICA IDENTITY FULL;
-ALTER TABLE remote_media_cache REPLICA IDENTITY FULL;
-ALTER TABLE server_signature_keys REPLICA IDENTITY FULL;
-ALTER TABLE users REPLICA IDENTITY FULL;

From e34f406cfd1ded69fb5c7d66321631679dac7ff6 Mon Sep 17 00:00:00 2001
From: "H. Shay" <hillerys@element.io>
Date: Mon, 11 Dec 2023 20:06:34 -0800
Subject: [PATCH 230/278] block non-admins from publishing to room directory

---
 synapse/handlers/room.py         |  4 +++-
 synapse/rest/client/directory.py | 10 ++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 2823ca6f0d0..c391ab8f4a5 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -871,7 +871,9 @@ async def create_room(
 
         # The spec says rooms should default to private visibility if
         # `visibility` is not specified.
-        visibility = config.get("visibility", "private")
+        #visibility = config.get("visibility", "private")
+        # temporarily block publishing rooms to directory - patch date 12/12/23
+        visibility = "private"
         is_public = visibility == "public"
 
         self._validate_room_config(config, visibility)
diff --git a/synapse/rest/client/directory.py b/synapse/rest/client/directory.py
index 3534c3c259e..0d16758f856 100644
--- a/synapse/rest/client/directory.py
+++ b/synapse/rest/client/directory.py
@@ -159,6 +159,16 @@ async def on_PUT(
 
         content = parse_and_validate_json_object_from_request(request, self.PutBody)
 
+        # temporarily block publishing rooms to public directory for non-admins
+        # patch date 12/12/23
+        if content.visibility == "public":
+            is_admin = await self.is_server_admin(requester)
+            if not is_admin:
+                raise AuthError(
+                    403,
+                    "Publishing rooms to the room list is temporarily disabled.",
+                )
+
         await self.directory_handler.edit_published_room_list(
             requester, room_id, content.visibility
         )

From e21ffb894839742520c9546486bacf1b3a8f2a26 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 22 Dec 2023 11:25:15 +0000
Subject: [PATCH 231/278] Fix 'block non-admins from publishing to room
 directory'

---
 synapse/rest/client/directory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/rest/client/directory.py b/synapse/rest/client/directory.py
index 0d16758f856..a4c9132e28f 100644
--- a/synapse/rest/client/directory.py
+++ b/synapse/rest/client/directory.py
@@ -162,7 +162,7 @@ async def on_PUT(
         # temporarily block publishing rooms to public directory for non-admins
         # patch date 12/12/23
         if content.visibility == "public":
-            is_admin = await self.is_server_admin(requester)
+            is_admin = await self.auth.is_server_admin(requester)
             if not is_admin:
                 raise AuthError(
                     403,

From 9302d20247c72fa322a51483f6842ec06ad1ac59 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 22 Jan 2024 15:39:41 +0000
Subject: [PATCH 232/278] Speed up e2e device keys queries for bot accounts

---
 .../storage/databases/main/end_to_end_keys.py | 29 ++++++++++++-------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 4e3171ce6cb..96ec140bf4a 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -406,17 +406,24 @@ async def _get_e2e_device_keys(
         def get_e2e_device_keys_txn(
             txn: LoggingTransaction, query_clause: str, query_params: list
         ) -> None:
-            sql = (
-                "SELECT user_id, device_id, "
-                "    d.display_name, "
-                "    k.key_json"
-                " FROM devices d"
-                "    %s JOIN e2e_device_keys_json k USING (user_id, device_id)"
-                " WHERE %s AND NOT d.hidden"
-            ) % (
-                "LEFT" if include_all_devices else "INNER",
-                query_clause,
-            )
+            if include_all_devices:
+                sql = f"""
+                    SELECT user_id, device_id, d.display_name, k.key_json
+                    FROM devices d
+                    LEFT JOIN e2e_device_keys_json k USING (user_id, device_id)
+                    WHERE {query_clause} AND NOT d.hidden
+                """
+            else:
+                # We swap around `e2e_device_keys_json` and `devices`, as we
+                # want Postgres to query `e2e_device_keys_json` first as it will
+                # have fewer rows in it. This helps *a lot* with accounts with
+                # lots of non-e2e devices (such as bots).
+                sql = f"""
+                    SELECT user_id, device_id, d.display_name, k.key_json
+                    FROM e2e_device_keys_json k
+                    INNER JOIN devices d USING (user_id, device_id)
+                    WHERE {query_clause} AND NOT d.hidden
+                """
 
             txn.execute(sql, query_params)
 

From 2b78ad37b3c938ec6ecd01dc5e5c1d1db35a91bd Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Tue, 5 Mar 2024 11:51:18 +0000
Subject: [PATCH 233/278] Revert "Improve DB performance of calculating badge
 counts for push. (#16756)"

This reverts commit b11f7b5122061d4908b3328689486bc16dc58445.
---
 synapse/push/push_tools.py                    |   8 +-
 .../databases/main/event_push_actions.py      | 253 ++++++++----------
 2 files changed, 114 insertions(+), 147 deletions(-)

diff --git a/synapse/push/push_tools.py b/synapse/push/push_tools.py
index 76c7ab64773..1ef881f7024 100644
--- a/synapse/push/push_tools.py
+++ b/synapse/push/push_tools.py
@@ -29,11 +29,17 @@
 
 async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -> int:
     invites = await store.get_invited_rooms_for_local_user(user_id)
+    joins = await store.get_rooms_for_user(user_id)
 
     badge = len(invites)
 
     room_to_count = await store.get_unread_counts_by_room_for_user(user_id)
-    for _room_id, notify_count in room_to_count.items():
+    for room_id, notify_count in room_to_count.items():
+        # room_to_count may include rooms which the user has left,
+        # ignore those.
+        if room_id not in joins:
+            continue
+
         if notify_count == 0:
             continue
 
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index d7aa8a0ee0b..3a5666cd9b0 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -358,6 +358,10 @@ async def get_unread_counts_by_room_for_user(self, user_id: str) -> Dict[str, in
         This function is intentionally not cached because it is called to calculate the
         unread badge for push notifications and thus the result is expected to change.
 
+        Note that this function assumes the user is a member of the room. Because
+        summary rows are not removed when a user leaves a room, the caller must
+        filter out those results from the result.
+
         Returns:
             A map of room ID to notification counts for the given user.
         """
@@ -370,170 +374,127 @@ async def get_unread_counts_by_room_for_user(self, user_id: str) -> Dict[str, in
     def _get_unread_counts_by_room_for_user_txn(
         self, txn: LoggingTransaction, user_id: str
     ) -> Dict[str, int]:
-        # To get the badge count of all rooms we need to make three queries:
-        #   1. Fetch all counts from `event_push_summary`, discarding any stale
-        #      rooms.
-        #   2. Fetch all notifications from `event_push_actions` that haven't
-        #      been rotated yet.
-        #   3. Fetch all notifications from `event_push_actions` for the stale
-        #      rooms.
-        #
-        # The "stale room" scenario generally happens when there is a new read
-        # receipt that hasn't yet been processed to update the
-        # `event_push_summary` table. When that happens we ignore the
-        # `event_push_summary` table for that room and calculate the count
-        # manually from `event_push_actions`.
-
-        # We need to only take into account read receipts of these types.
-        receipt_types_clause, receipt_types_args = make_in_list_sql_clause(
+        receipt_types_clause, args = make_in_list_sql_clause(
             self.database_engine,
             "receipt_type",
             (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
         )
+        args.extend([user_id, user_id])
+
+        receipts_cte = f"""
+            WITH all_receipts AS (
+                SELECT room_id, thread_id, MAX(event_stream_ordering) AS max_receipt_stream_ordering
+                FROM receipts_linearized
+                LEFT JOIN events USING (room_id, event_id)
+                WHERE
+                    {receipt_types_clause}
+                    AND user_id = ?
+                GROUP BY room_id, thread_id
+            )
+        """
+
+        receipts_joins = """
+            LEFT JOIN (
+                SELECT room_id, thread_id,
+                max_receipt_stream_ordering AS threaded_receipt_stream_ordering
+                FROM all_receipts
+                WHERE thread_id IS NOT NULL
+            ) AS threaded_receipts USING (room_id, thread_id)
+            LEFT JOIN (
+                SELECT room_id, thread_id,
+                max_receipt_stream_ordering AS unthreaded_receipt_stream_ordering
+                FROM all_receipts
+                WHERE thread_id IS NULL
+            ) AS unthreaded_receipts USING (room_id)
+        """
+
+        # First get summary counts by room / thread for the user. We use the max receipt
+        # stream ordering of both threaded & unthreaded receipts to compare against the
+        # summary table.
+        #
+        # PostgreSQL and SQLite differ in comparing scalar numerics.
+        if isinstance(self.database_engine, PostgresEngine):
+            # GREATEST ignores NULLs.
+            max_clause = """GREATEST(
+                threaded_receipt_stream_ordering,
+                unthreaded_receipt_stream_ordering
+            )"""
+        else:
+            # MAX returns NULL if any are NULL, so COALESCE to 0 first.
+            max_clause = """MAX(
+                COALESCE(threaded_receipt_stream_ordering, 0),
+                COALESCE(unthreaded_receipt_stream_ordering, 0)
+            )"""
 
-        # Step 1, fetch all counts from `event_push_summary` for the user. This
-        # is slightly convoluted as we also need to pull out the stream ordering
-        # of the most recent receipt of the user in the room (either a thread
-        # aware receipt or thread unaware receipt) in order to determine
-        # whether the row in `event_push_summary` is stale. Hence the outer
-        # GROUP BY and odd join condition against `receipts_linearized`.
         sql = f"""
-            SELECT room_id, notif_count, stream_ordering, thread_id, last_receipt_stream_ordering,
-                MAX(receipt_stream_ordering)
-            FROM (
-                SELECT e.room_id, notif_count, e.stream_ordering, e.thread_id, last_receipt_stream_ordering,
-                    ev.stream_ordering AS receipt_stream_ordering
-                FROM event_push_summary AS e
-                INNER JOIN local_current_membership USING (user_id, room_id)
-                LEFT JOIN receipts_linearized AS r ON (
-                    e.user_id = r.user_id
-                    AND e.room_id = r.room_id
-                    AND (e.thread_id = r.thread_id OR r.thread_id IS NULL)
-                    AND {receipt_types_clause}
+            {receipts_cte}
+            SELECT eps.room_id, eps.thread_id, notif_count
+            FROM event_push_summary AS eps
+            {receipts_joins}
+            WHERE user_id = ?
+                AND notif_count != 0
+                AND (
+                    (last_receipt_stream_ordering IS NULL AND stream_ordering > {max_clause})
+                    OR last_receipt_stream_ordering = {max_clause}
                 )
-                LEFT JOIN events AS ev ON (r.event_id = ev.event_id)
-                WHERE e.user_id = ? and notif_count > 0
-            ) AS es
-            GROUP BY room_id, notif_count, stream_ordering, thread_id, last_receipt_stream_ordering
         """
+        txn.execute(sql, args)
 
-        txn.execute(
-            sql,
-            receipt_types_args
-            + [
-                user_id,
-            ],
-        )
-
+        seen_thread_ids = set()
         room_to_count: Dict[str, int] = defaultdict(int)
-        stale_room_ids = set()
-        for row in txn:
-            room_id = row[0]
-            notif_count = row[1]
-            stream_ordering = row[2]
-            _thread_id = row[3]
-            last_receipt_stream_ordering = row[4]
-            receipt_stream_ordering = row[5]
-
-            if last_receipt_stream_ordering is None:
-                if receipt_stream_ordering is None:
-                    room_to_count[room_id] += notif_count
-                elif stream_ordering > receipt_stream_ordering:
-                    room_to_count[room_id] += notif_count
-                else:
-                    # The latest read receipt from the user is after all the rows for
-                    # this room in `event_push_summary`. We ignore them, and
-                    # calculate the count from `event_push_actions` in step 3.
-                    pass
-            elif last_receipt_stream_ordering == receipt_stream_ordering:
-                room_to_count[room_id] += notif_count
-            else:
-                # The row is stale if `last_receipt_stream_ordering` is set and
-                # *doesn't* match the latest receipt from the user.
-                stale_room_ids.add(room_id)
 
-        # Discard any stale rooms from `room_to_count`, as we will recalculate
-        # them in step 3.
-        for room_id in stale_room_ids:
-            room_to_count.pop(room_id, None)
+        for room_id, thread_id, notif_count in txn:
+            room_to_count[room_id] += notif_count
+            seen_thread_ids.add(thread_id)
 
-        # Step 2, basically the same query, except against `event_push_actions`
-        # and only fetching rows inserted since the last rotation.
-        rotated_upto_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
-            txn,
-            table="event_push_summary_stream_ordering",
-            keyvalues={},
-            retcol="stream_ordering",
+        # Now get any event push actions that haven't been rotated using the same OR
+        # join and filter by receipt and event push summary rotated up to stream ordering.
+        sql = f"""
+            {receipts_cte}
+            SELECT epa.room_id, epa.thread_id, COUNT(CASE WHEN epa.notif = 1 THEN 1 END) AS notif_count
+            FROM event_push_actions AS epa
+            {receipts_joins}
+            WHERE user_id = ?
+                AND epa.notif = 1
+                AND stream_ordering > (SELECT stream_ordering FROM event_push_summary_stream_ordering)
+                AND (threaded_receipt_stream_ordering IS NULL OR stream_ordering > threaded_receipt_stream_ordering)
+                AND (unthreaded_receipt_stream_ordering IS NULL OR stream_ordering > unthreaded_receipt_stream_ordering)
+            GROUP BY epa.room_id, epa.thread_id
+        """
+        txn.execute(sql, args)
+
+        for room_id, thread_id, notif_count in txn:
+            # Note: only count push actions we have valid summaries for with up to date receipt.
+            if thread_id not in seen_thread_ids:
+                continue
+            room_to_count[room_id] += notif_count
+
+        thread_id_clause, thread_ids_args = make_in_list_sql_clause(
+            self.database_engine, "epa.thread_id", seen_thread_ids
         )
 
+        # Finally re-check event_push_actions for any rooms not in the summary, ignoring
+        # the rotated up-to position. This handles the case where a read receipt has arrived
+        # but not been rotated meaning the summary table is out of date, so we go back to
+        # the push actions table.
         sql = f"""
-            SELECT room_id, thread_id
-            FROM (
-                SELECT e.room_id, e.stream_ordering, e.thread_id,
-                    ev.stream_ordering AS receipt_stream_ordering
-                FROM event_push_actions AS e
-                INNER JOIN local_current_membership USING (user_id, room_id)
-                LEFT JOIN receipts_linearized AS r ON (
-                    e.user_id = r.user_id
-                    AND e.room_id = r.room_id
-                    AND (e.thread_id = r.thread_id OR r.thread_id IS NULL)
-                    AND {receipt_types_clause}
-                )
-                LEFT JOIN events AS ev ON (r.event_id = ev.event_id)
-                WHERE e.user_id = ? and notif > 0
-                    AND e.stream_ordering > ?
-            ) AS es
-            GROUP BY room_id, stream_ordering, thread_id
-            HAVING stream_ordering > COALESCE(MAX(receipt_stream_ordering), 0)
+            {receipts_cte}
+            SELECT epa.room_id, COUNT(CASE WHEN epa.notif = 1 THEN 1 END) AS notif_count
+            FROM event_push_actions AS epa
+            {receipts_joins}
+            WHERE user_id = ?
+            AND NOT {thread_id_clause}
+            AND epa.notif = 1
+            AND (threaded_receipt_stream_ordering IS NULL OR stream_ordering > threaded_receipt_stream_ordering)
+            AND (unthreaded_receipt_stream_ordering IS NULL OR stream_ordering > unthreaded_receipt_stream_ordering)
+            GROUP BY epa.room_id
         """
 
-        txn.execute(
-            sql,
-            receipt_types_args + [user_id, rotated_upto_stream_ordering],
-        )
-        for room_id, _thread_id in txn:
-            # Again, we ignore any stale rooms.
-            if room_id not in stale_room_ids:
-                # For event push actions it is one notification per row.
-                room_to_count[room_id] += 1
-
-        # Step 3, if we have stale rooms then we need to recalculate the counts
-        # from `event_push_actions`. Again, this is basically the same query as
-        # above except without a lower bound on stream ordering and only against
-        # a specific set of rooms.
-        if stale_room_ids:
-            room_id_clause, room_id_args = make_in_list_sql_clause(
-                self.database_engine,
-                "e.room_id",
-                stale_room_ids,
-            )
+        args.extend(thread_ids_args)
+        txn.execute(sql, args)
 
-            sql = f"""
-                SELECT room_id, thread_id
-                FROM (
-                    SELECT e.room_id, e.stream_ordering, e.thread_id,
-                        ev.stream_ordering AS receipt_stream_ordering
-                    FROM event_push_actions AS e
-                    INNER JOIN local_current_membership USING (user_id, room_id)
-                    LEFT JOIN receipts_linearized AS r ON (
-                        e.user_id = r.user_id
-                        AND e.room_id = r.room_id
-                        AND (e.thread_id = r.thread_id OR r.thread_id IS NULL)
-                        AND {receipt_types_clause}
-                    )
-                    LEFT JOIN events AS ev ON (r.event_id = ev.event_id)
-                    WHERE e.user_id = ? and notif > 0
-                        AND {room_id_clause}
-                ) AS es
-                GROUP BY room_id, stream_ordering, thread_id
-                HAVING stream_ordering > COALESCE(MAX(receipt_stream_ordering), 0)
-            """
-            txn.execute(
-                sql,
-                receipt_types_args + [user_id] + room_id_args,
-            )
-            for room_id, _ in txn:
-                room_to_count[room_id] += 1
+        for room_id, notif_count in txn:
+            room_to_count[room_id] += notif_count
 
         return room_to_count
 

From 656a007c701fec061e2e397051e16ce58905db92 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Mon, 15 Apr 2024 15:56:09 +0100
Subject: [PATCH 234/278] Revert "block non-admins from publishing to room
 directory"

This reverts commit e34f406cfd1ded69fb5c7d66321631679dac7ff6.
---
 synapse/handlers/room.py         |  4 +---
 synapse/rest/client/directory.py | 10 ----------
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 6ad6a900548..5e81a51638a 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -888,9 +888,7 @@ async def create_room(
 
         # The spec says rooms should default to private visibility if
         # `visibility` is not specified.
-        #visibility = config.get("visibility", "private")
-        # temporarily block publishing rooms to directory - patch date 12/12/23
-        visibility = "private"
+        visibility = config.get("visibility", "private")
         is_public = visibility == "public"
 
         self._validate_room_config(config, visibility)
diff --git a/synapse/rest/client/directory.py b/synapse/rest/client/directory.py
index bfd4e1557d3..8099fdf3e47 100644
--- a/synapse/rest/client/directory.py
+++ b/synapse/rest/client/directory.py
@@ -166,16 +166,6 @@ async def on_PUT(
 
         content = parse_and_validate_json_object_from_request(request, self.PutBody)
 
-        # temporarily block publishing rooms to public directory for non-admins
-        # patch date 12/12/23
-        if content.visibility == "public":
-            is_admin = await self.auth.is_server_admin(requester)
-            if not is_admin:
-                raise AuthError(
-                    403,
-                    "Publishing rooms to the room list is temporarily disabled.",
-                )
-
         await self.directory_handler.edit_published_room_list(
             requester, room_id, content.visibility
         )

From 689666c4ba2702852fe71d2959d3e8c67047a0fd Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 17 May 2024 17:38:44 +0100
Subject: [PATCH 235/278] Reduce work of calculating outbound device pokes

---
 synapse/handlers/device.py                |  7 +++++++
 synapse/storage/databases/main/devices.py | 23 +++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 67953a3ed92..412bee2b765 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -892,6 +892,13 @@ async def _handle_new_device_update_async(self) -> None:
                         context=opentracing_context,
                     )
 
+                    await self.store.mark_redundant_device_lists_pokes(
+                        user_id=user_id,
+                        device_id=device_id,
+                        room_id=room_id,
+                        converted_upto_stream_id=stream_id,
+                    )
+
                     # Notify replication that we've updated the device list stream.
                     self.notifier.notify_replication()
 
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 8dbcb3f5a05..5bec9c9e5f5 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -2118,6 +2118,29 @@ def _add_device_outbound_poke_to_stream_txn(
                 },
             )
 
+    async def mark_redundant_device_lists_pokes(
+        self,
+        user_id: str,
+        device_id: str,
+        room_id: str,
+        converted_upto_stream_id: int,
+    ) -> None:
+        """If we've calculated the outbound pokes for a given room/device list
+        update, mark any subsequent changes as already converted"""
+
+        sql = """
+            UPDATE device_lists_changes_in_room
+            SET converted_to_destinations = true
+            WHERE stream_id > ? AND user_id = ? and device_id = ? AND room_id = ?;
+        """
+
+        def mark_redundant_device_lists_pokes_txn(txn: LoggingTransaction) -> None:
+            txn.execute(sql, (converted_upto_stream_id, user_id, device_id, room_id))
+
+        return await self.db_pool.runInteraction(
+            "mark_redundant_device_lists_pokes", mark_redundant_device_lists_pokes_txn
+        )
+
     def _add_device_outbound_room_poke_txn(
         self,
         txn: LoggingTransaction,

From db39ef061b53d99175b68e129b5de3d83a9f4c25 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 17 May 2024 17:39:14 +0100
Subject: [PATCH 236/278] Newsfile

---
 changelog.d/17211.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/17211.misc

diff --git a/changelog.d/17211.misc b/changelog.d/17211.misc
new file mode 100644
index 00000000000..144db03a409
--- /dev/null
+++ b/changelog.d/17211.misc
@@ -0,0 +1 @@
+Reduce work of calculating outbound device lists updates.

From 1b7fa7b04a895b501072f16ca92c776840dcf090 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Sat, 18 May 2024 12:08:30 +0100
Subject: [PATCH 237/278] Add StreamToken.is_before_or_eq func

---
 synapse/notifier.py       |  3 +++
 synapse/types/__init__.py | 52 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/synapse/notifier.py b/synapse/notifier.py
index 7c1cd3b5f2f..06ce04c8003 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -763,6 +763,9 @@ async def check_for_updates(
 
         return result
 
+    async def wait_for_stream_position(self, stream_token: StreamToken) -> None:
+        pass
+
     async def _get_room_ids(
         self, user: UserID, explicit_room_id: Optional[str]
     ) -> Tuple[StrCollection, bool]:
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 509a2d3a0f9..8ba35eef0de 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -48,7 +48,7 @@
 from immutabledict import immutabledict
 from signedjson.key import decode_verify_key_bytes
 from signedjson.types import VerifyKey
-from typing_extensions import TypedDict
+from typing_extensions import Self, TypedDict
 from unpaddedbase64 import decode_base64
 from zope.interface import Interface
 
@@ -515,6 +515,27 @@ def get_stream_pos_for_instance(self, instance_name: str) -> int:
         # at `self.stream`.
         return self.instance_map.get(instance_name, self.stream)
 
+    def is_before_or_eq(self, other_token: Self) -> bool:
+        """Wether this token is before the other token, i.e. every constituent
+        part is before the other.
+
+        Essentially it is `self <= other`.
+
+        Note: if `self.is_before_or_eq(other_token) is False` then that does not
+        imply that the reverse is True.
+        """
+        if self.stream > other_token.stream:
+            return False
+
+        instances = self.instance_map.keys() | other_token.instance_map.keys()
+        for instance in instances:
+            if self.instance_map.get(
+                instance, self.stream
+            ) > other_token.instance_map.get(instance, other_token.stream):
+                return False
+
+        return True
+
 
 @attr.s(frozen=True, slots=True, order=False)
 class RoomStreamToken(AbstractMultiWriterStreamToken):
@@ -1008,6 +1029,35 @@ def get_field(
         """Returns the stream ID for the given key."""
         return getattr(self, key.value)
 
+    def is_before_or_eq(self, other_token: "StreamToken") -> bool:
+        """Wether this token is before the other token, i.e. every constituent
+        part is before the other.
+
+        Essentially it is `self <= other`.
+
+        Note: if `self.is_before_or_eq(other_token) is False` then that does not
+        imply that the reverse is True.
+        """
+
+        for _, key in StreamKeyType.__members__.items():
+            self_value = self.get_field(key)
+            other_value = other_token.get_field(key)
+
+            if isinstance(self_value, RoomStreamToken):
+                assert isinstance(other_value, RoomStreamToken)
+                if not self_value.is_before_or_eq(other_value):
+                    return False
+            elif isinstance(self_value, MultiWriterStreamToken):
+                assert isinstance(other_value, MultiWriterStreamToken)
+                if not self_value.is_before_or_eq(other_value):
+                    return False
+            else:
+                assert isinstance(other_value, int)
+                if self_value > other_value:
+                    return False
+
+        return True
+
 
 StreamToken.START = StreamToken(
     RoomStreamToken(stream=0), 0, 0, MultiWriterStreamToken(stream=0), 0, 0, 0, 0, 0, 0

From 169c9f85a8036c14d3dce61fa227c0de210f8e5e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Sat, 18 May 2024 12:22:09 +0100
Subject: [PATCH 238/278] In sync wait for worker to catch up since token

Otherwise things will get confused.
---
 synapse/handlers/sync.py | 35 +++++++++++++++++++++++++++++++++++
 synapse/notifier.py      | 20 ++++++++++++++++++--
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index d3d40e86821..37d5890c652 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -279,6 +279,23 @@ def __bool__(self) -> bool:
             or self.device_lists
         )
 
+    @staticmethod
+    def empty(next_batch: StreamToken) -> "SyncResult":
+        "Return a new empty result"
+        return SyncResult(
+            next_batch=next_batch,
+            presence=[],
+            account_data=[],
+            joined=[],
+            invited=[],
+            knocked=[],
+            archived=[],
+            to_device=[],
+            device_lists=DeviceListUpdates(),
+            device_one_time_keys_count={},
+            device_unused_fallback_key_types=[],
+        )
+
 
 class SyncHandler:
     def __init__(self, hs: "HomeServer"):
@@ -401,6 +418,24 @@ async def _wait_for_sync_for_user(
         if context:
             context.tag = sync_label
 
+        if since_token is not None:
+            # We need to make sure this worker has caught up with the token. If
+            # this returns false it means we timed out waiting, and we should
+            # just return an empty response.
+            start = self.clock.time_msec()
+            if not await self.notifier.wait_for_stream_token(since_token):
+                logger.warning(
+                    "Timed out waiting for worker to catch up. Returning empty response"
+                )
+                return SyncResult.empty(since_token)
+
+            # If we've spent significant time waiting to catch up, take it off
+            # the timeout.
+            now = self.clock.time_msec()
+            if now - start > 1_000:
+                timeout -= now - start
+                timeout = max(timeout, 0)
+
         # if we have a since token, delete any to-device messages before that token
         # (since we now know that the device has received them)
         if since_token is not None:
diff --git a/synapse/notifier.py b/synapse/notifier.py
index 06ce04c8003..459954caeb1 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -763,8 +763,24 @@ async def check_for_updates(
 
         return result
 
-    async def wait_for_stream_position(self, stream_token: StreamToken) -> None:
-        pass
+    async def wait_for_stream_token(self, stream_token: StreamToken) -> bool:
+        """Wait for this worker to catch up with the given stream token."""
+
+        start = self.clock.time_msec()
+        while True:
+            current_token = self.event_sources.get_current_token()
+            if stream_token.is_before_or_eq(current_token):
+                return True
+
+            now = self.clock.time_msec()
+
+            if now - start > 10_000:
+                return False
+
+            logger.info("Waiting for current token to reach %s", stream_token)
+
+            # TODO: be better
+            await self.clock.sleep(0.5)
 
     async def _get_room_ids(
         self, user: UserID, explicit_room_id: Optional[str]

From bd6b57653f9517f6cdce1b3e05321ec7780d0aaf Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Sat, 18 May 2024 12:23:54 +0100
Subject: [PATCH 239/278] Newsfile

---
 changelog.d/17215.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/17215.bugfix

diff --git a/changelog.d/17215.bugfix b/changelog.d/17215.bugfix
new file mode 100644
index 00000000000..10981b798e0
--- /dev/null
+++ b/changelog.d/17215.bugfix
@@ -0,0 +1 @@
+Fix bug where duplicate events could be sent down sync when using workers that are overloaded.

From bec0313e1b48ab1840b4e220c8de7484016c819d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@element.io>
Date: Tue, 14 May 2024 14:39:04 +0100
Subject: [PATCH 240/278] Improve perf of sync device lists (#17191)

It's almost always more efficient to query the rooms that have device
list changes, rather than looking at the list of all users whose devices
have changed and then look for shared rooms.
---
 changelog.d/17191.misc                    |  1 +
 synapse/handlers/sync.py                  | 37 ++++-------------------
 synapse/storage/databases/main/devices.py | 17 ++---------
 3 files changed, 9 insertions(+), 46 deletions(-)
 create mode 100644 changelog.d/17191.misc

diff --git a/changelog.d/17191.misc b/changelog.d/17191.misc
new file mode 100644
index 00000000000..bd55eeaa33c
--- /dev/null
+++ b/changelog.d/17191.misc
@@ -0,0 +1 @@
+Improve performance of calculating device lists changes in `/sync`.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index d3d40e86821..fdaaa9006c8 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1886,38 +1886,13 @@ async def _generate_sync_entry_for_device_list(
 
         # Step 1a, check for changes in devices of users we share a room
         # with
-        #
-        # We do this in two different ways depending on what we have cached.
-        # If we already have a list of all the user that have changed since
-        # the last sync then it's likely more efficient to compare the rooms
-        # they're in with the rooms the syncing user is in.
-        #
-        # If we don't have that info cached then we get all the users that
-        # share a room with our user and check if those users have changed.
-        cache_result = self.store.get_cached_device_list_changes(
-            since_token.device_list_key
-        )
-        if cache_result.hit:
-            changed_users = cache_result.entities
-
-            result = await self.store.get_rooms_for_users(changed_users)
-
-            for changed_user_id, entries in result.items():
-                # Check if the changed user shares any rooms with the user,
-                # or if the changed user is the syncing user (as we always
-                # want to include device list updates of their own devices).
-                if user_id == changed_user_id or any(
-                    rid in joined_room_ids for rid in entries
-                ):
-                    users_that_have_changed.add(changed_user_id)
-        else:
-            users_that_have_changed = (
-                await self._device_handler.get_device_changes_in_shared_rooms(
-                    user_id,
-                    sync_result_builder.joined_room_ids,
-                    from_token=since_token,
-                )
+        users_that_have_changed = (
+            await self._device_handler.get_device_changes_in_shared_rooms(
+                user_id,
+                sync_result_builder.joined_room_ids,
+                from_token=since_token,
             )
+        )
 
         # Step 1b, check for newly joined rooms
         for room_id in newly_joined_rooms:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 8dbcb3f5a05..d98f0593bce 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -70,10 +70,7 @@
 from synapse.util import json_decoder, json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.caches.lrucache import LruCache
-from synapse.util.caches.stream_change_cache import (
-    AllEntitiesChangedResult,
-    StreamChangeCache,
-)
+from synapse.util.caches.stream_change_cache import StreamChangeCache
 from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
 from synapse.util.stringutils import shortstr
@@ -832,16 +829,6 @@ async def get_cached_devices_for_user(
         )
         return {device[0]: db_to_json(device[1]) for device in devices}
 
-    def get_cached_device_list_changes(
-        self,
-        from_key: int,
-    ) -> AllEntitiesChangedResult:
-        """Get set of users whose devices have changed since `from_key`, or None
-        if that information is not in our cache.
-        """
-
-        return self._device_list_stream_cache.get_all_entities_changed(from_key)
-
     @cancellable
     async def get_all_devices_changed(
         self,
@@ -1475,7 +1462,7 @@ async def get_device_list_changes_in_rooms(
 
         sql = """
             SELECT DISTINCT user_id FROM device_lists_changes_in_room
-            WHERE {clause} AND stream_id >= ?
+            WHERE {clause} AND stream_id > ?
         """
 
         def _get_device_list_changes_in_rooms_txn(

From 5b2b3120c283a90be78ee5970c0c5c67c049e482 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 15 May 2024 15:11:50 +0100
Subject: [PATCH 241/278] Cap the top stream ID when fetching changed devices

---
 synapse/handlers/device.py                | 22 ++++++++++++++++++----
 synapse/handlers/sync.py                  |  1 +
 synapse/storage/databases/main/devices.py |  5 +++--
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 67953a3ed92..55842e7c7b1 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -159,20 +159,32 @@ async def get_device(self, user_id: str, device_id: str) -> JsonDict:
 
     @cancellable
     async def get_device_changes_in_shared_rooms(
-        self, user_id: str, room_ids: StrCollection, from_token: StreamToken
+        self,
+        user_id: str,
+        room_ids: StrCollection,
+        from_token: StreamToken,
+        now_token: Optional[StreamToken] = None,
     ) -> Set[str]:
         """Get the set of users whose devices have changed who share a room with
         the given user.
         """
+        now_device_lists_key = self.store.get_device_stream_token()
+        if now_token:
+            now_device_lists_key = now_token.device_list_key
+
         changed_users = await self.store.get_device_list_changes_in_rooms(
-            room_ids, from_token.device_list_key
+            room_ids,
+            from_token.device_list_key,
+            now_device_lists_key,
         )
 
         if changed_users is not None:
             # We also check if the given user has changed their device. If
             # they're in no rooms then the above query won't include them.
             changed = await self.store.get_users_whose_devices_changed(
-                from_token.device_list_key, [user_id]
+                from_token.device_list_key,
+                [user_id],
+                to_key=now_device_lists_key,
             )
             changed_users.update(changed)
             return changed_users
@@ -190,7 +202,9 @@ async def get_device_changes_in_shared_rooms(
         tracked_users.add(user_id)
 
         changed = await self.store.get_users_whose_devices_changed(
-            from_token.device_list_key, tracked_users
+            from_token.device_list_key,
+            tracked_users,
+            to_key=now_device_lists_key,
         )
 
         return changed
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index fdaaa9006c8..b7917a99d6d 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1891,6 +1891,7 @@ async def _generate_sync_entry_for_device_list(
                 user_id,
                 sync_result_builder.joined_room_ids,
                 from_token=since_token,
+                now_token=sync_result_builder.now_token,
             )
         )
 
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index d98f0593bce..6bfbb18f318 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1444,7 +1444,7 @@ async def _get_min_device_lists_changes_in_room(self) -> int:
 
     @cancellable
     async def get_device_list_changes_in_rooms(
-        self, room_ids: Collection[str], from_id: int
+        self, room_ids: Collection[str], from_id: int, to_id: int
     ) -> Optional[Set[str]]:
         """Return the set of users whose devices have changed in the given rooms
         since the given stream ID.
@@ -1462,7 +1462,7 @@ async def get_device_list_changes_in_rooms(
 
         sql = """
             SELECT DISTINCT user_id FROM device_lists_changes_in_room
-            WHERE {clause} AND stream_id > ?
+            WHERE {clause} AND stream_id > ? AND stream_id <= ?
         """
 
         def _get_device_list_changes_in_rooms_txn(
@@ -1479,6 +1479,7 @@ def _get_device_list_changes_in_rooms_txn(
                 self.database_engine, "room_id", chunk
             )
             args.append(from_id)
+            args.append(to_id)
 
             changes |= await self.db_pool.runInteraction(
                 "get_device_list_changes_in_rooms",

From cf474a094fea7c410e01cd3ae7ac562e6c7188bf Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 16 May 2024 10:09:41 +0100
Subject: [PATCH 242/278] Add stream change cache for device lists in room

---
 synapse/replication/tcp/client.py         | 14 +++--
 synapse/storage/databases/main/devices.py | 69 +++++++++++++++++++++--
 2 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 5e5387fdcb7..cff88a87ec7 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -112,6 +112,14 @@ async def on_rdata(
             token: stream token for this batch of rows
             rows: a list of Stream.ROW_TYPE objects as returned by Stream.parse_row.
         """
+        all_room_ids: Set[str] = set()
+        if stream_name == DeviceListsStream.NAME:
+            prev_token = self.store.get_device_stream_token()
+            all_room_ids = await self.store.get_all_device_list_changes(
+                prev_token, token
+            )
+            self.store.device_lists_in_rooms_have_changed(all_room_ids, token)
+
         self.store.process_replication_rows(stream_name, instance_name, token, rows)
         # NOTE: this must be called after process_replication_rows to ensure any
         # cache invalidations are first handled before any stream ID advances.
@@ -146,12 +154,6 @@ async def on_rdata(
                     StreamKeyType.TO_DEVICE, token, users=entities
                 )
         elif stream_name == DeviceListsStream.NAME:
-            all_room_ids: Set[str] = set()
-            for row in rows:
-                if row.entity.startswith("@") and not row.is_signature:
-                    room_ids = await self.store.get_rooms_for_user(row.entity)
-                    all_room_ids.update(room_ids)
-
             # `all_room_ids` can be large, so let's wake up those streams in batches
             for batched_room_ids in batch_iter(all_room_ids, 100):
                 self.notifier.on_new_event(
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 6bfbb18f318..f4410b5c02e 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -129,6 +129,20 @@ def __init__(
             prefilled_cache=device_list_prefill,
         )
 
+        device_list_room_prefill, min_device_list_room_id = self.db_pool.get_cache_dict(
+            db_conn,
+            "device_lists_changes_in_room",
+            entity_column="room_id",
+            stream_column="stream_id",
+            max_value=device_list_max,
+            limit=10000,
+        )
+        self._device_list_room_stream_cache = StreamChangeCache(
+            "DeviceListRoomStreamChangeCache",
+            min_device_list_room_id,
+            prefilled_cache=device_list_room_prefill,
+        )
+
         (
             user_signature_stream_prefill,
             user_signature_stream_list_id,
@@ -206,6 +220,13 @@ def _invalidate_caches_for_devices(
                     row.entity, token
                 )
 
+    def device_lists_in_rooms_have_changed(
+        self, room_ids: StrCollection, token: int
+    ) -> None:
+        "Record that device lists have changed in rooms"
+        for room_id in room_ids:
+            self._device_list_room_stream_cache.entity_has_changed(room_id, token)
+
     def get_device_stream_token(self) -> int:
         return self._device_list_id_gen.get_current_token()
 
@@ -1460,6 +1481,12 @@ async def get_device_list_changes_in_rooms(
         if min_stream_id > from_id:
             return None
 
+        changed_room_ids = self._device_list_room_stream_cache.get_entities_changed(
+            room_ids, from_id
+        )
+        if not changed_room_ids:
+            return set()
+
         sql = """
             SELECT DISTINCT user_id FROM device_lists_changes_in_room
             WHERE {clause} AND stream_id > ? AND stream_id <= ?
@@ -1474,7 +1501,7 @@ def _get_device_list_changes_in_rooms_txn(
             return {user_id for user_id, in txn}
 
         changes = set()
-        for chunk in batch_iter(room_ids, 1000):
+        for chunk in batch_iter(changed_room_ids, 1000):
             clause, args = make_in_list_sql_clause(
                 self.database_engine, "room_id", chunk
             )
@@ -1490,6 +1517,34 @@ def _get_device_list_changes_in_rooms_txn(
 
         return changes
 
+    async def get_all_device_list_changes(self, from_id: int, to_id: int) -> Set[str]:
+        """Return the set of rooms where devices have changed since the given
+        stream ID.
+
+        Will raise an exception if the given stream ID is too old.
+        """
+
+        min_stream_id = await self._get_min_device_lists_changes_in_room()
+
+        if min_stream_id > from_id:
+            raise Exception("stream ID is too old")
+
+        sql = """
+            SELECT DISTINCT room_id FROM device_lists_changes_in_room
+            WHERE stream_id > ? AND stream_id <= ?
+        """
+
+        def _get_all_device_list_changes_txn(
+            txn: LoggingTransaction,
+        ) -> Set[str]:
+            txn.execute(sql, (from_id, to_id))
+            return {room_id for room_id, in txn}
+
+        return await self.db_pool.runInteraction(
+            "get_all_device_list_changes",
+            _get_all_device_list_changes_txn,
+        )
+
     async def get_device_list_changes_in_room(
         self, room_id: str, min_stream_id: int
     ) -> Collection[Tuple[str, str]]:
@@ -1950,8 +2005,8 @@ def _update_remote_device_list_cache_txn(
     async def add_device_change_to_streams(
         self,
         user_id: str,
-        device_ids: Collection[str],
-        room_ids: Collection[str],
+        device_ids: StrCollection,
+        room_ids: StrCollection,
     ) -> Optional[int]:
         """Persist that a user's devices have been updated, and which hosts
         (if any) should be poked.
@@ -2110,8 +2165,8 @@ def _add_device_outbound_room_poke_txn(
         self,
         txn: LoggingTransaction,
         user_id: str,
-        device_ids: Iterable[str],
-        room_ids: Collection[str],
+        device_ids: StrCollection,
+        room_ids: StrCollection,
         stream_ids: List[int],
         context: Dict[str, str],
     ) -> None:
@@ -2149,6 +2204,10 @@ def _add_device_outbound_room_poke_txn(
             ],
         )
 
+        txn.call_after(
+            self.device_lists_in_rooms_have_changed, room_ids, max(stream_ids)
+        )
+
     async def get_uncoverted_outbound_room_pokes(
         self, start_stream_id: int, start_room_id: str, limit: int = 10
     ) -> List[Tuple[str, str, str, int, Optional[Dict[str, str]]]]:

From e6d3d808aa68d47fc52207e8f33a6d931259df01 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Sat, 18 May 2024 12:31:21 +0100
Subject: [PATCH 243/278] Newsfile

---
 changelog.d/{17191.misc => 17216.misc} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename changelog.d/{17191.misc => 17216.misc} (100%)

diff --git a/changelog.d/17191.misc b/changelog.d/17216.misc
similarity index 100%
rename from changelog.d/17191.misc
rename to changelog.d/17216.misc

From a2dc84fc90bc554947f87b72bd2c5efc2b896285 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Sat, 18 May 2024 12:40:04 +0100
Subject: [PATCH 244/278] Bump typing_extensions version

---
 pyproject.toml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index dd4521ff718..a7d43b17444 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -200,10 +200,8 @@ netaddr = ">=0.7.18"
 # add a lower bound to the Jinja2 dependency.
 Jinja2 = ">=3.0"
 bleach = ">=1.4.3"
-# We use `ParamSpec` and `Concatenate`, which were added in `typing-extensions` 3.10.0.0.
-# Additionally we need https://github.com/python/typing/pull/817 to allow types to be
-# generic over ParamSpecs.
-typing-extensions = ">=3.10.0.1"
+# We use `Self`, which were added in `typing-extensions` 4.0.
+typing-extensions = ">=4.0"
 # We enforce that we have a `cryptography` version that bundles an `openssl`
 # with the latest security patches.
 cryptography = ">=3.4.7"

From 7bd1575ddf56ff4356e4c971fbf1b12483130d84 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Sat, 18 May 2024 13:08:52 +0100
Subject: [PATCH 245/278] Ignore typing stream for now

---
 synapse/types/__init__.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 8ba35eef0de..151658df534 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -1040,6 +1040,12 @@ def is_before_or_eq(self, other_token: "StreamToken") -> bool:
         """
 
         for _, key in StreamKeyType.__members__.items():
+            if key == StreamKeyType.TYPING:
+                # Typing stream is allowed to "reset", and so comparisons don't
+                # really make sense as is.
+                # TODO: Figure out a better way of tracking resets.
+                continue
+
             self_value = self.get_field(key)
             other_value = other_token.get_field(key)
 

From 1e4d9df3cd38dcc6f49f789fcb39d0b5df79db2a Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Sat, 18 May 2024 15:21:40 +0100
Subject: [PATCH 246/278] Add current token to log line

---
 synapse/notifier.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/synapse/notifier.py b/synapse/notifier.py
index 459954caeb1..ced9e9ad667 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -777,7 +777,11 @@ async def wait_for_stream_token(self, stream_token: StreamToken) -> bool:
             if now - start > 10_000:
                 return False
 
-            logger.info("Waiting for current token to reach %s", stream_token)
+            logger.info(
+                "Waiting for current token to reach %s; currently at %s",
+                stream_token,
+                current_token,
+            )
 
             # TODO: be better
             await self.clock.sleep(0.5)

From 6a0d2dc6fc48ae51b3a62d7eca730a66774746af Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Sat, 18 May 2024 16:12:46 +0100
Subject: [PATCH 247/278] Only check for all rooms if not outbound poke

---
 synapse/replication/tcp/client.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index cff88a87ec7..2d6d49eed73 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -114,11 +114,12 @@ async def on_rdata(
         """
         all_room_ids: Set[str] = set()
         if stream_name == DeviceListsStream.NAME:
-            prev_token = self.store.get_device_stream_token()
-            all_room_ids = await self.store.get_all_device_list_changes(
-                prev_token, token
-            )
-            self.store.device_lists_in_rooms_have_changed(all_room_ids, token)
+            if any(row.entity.startswith("@") and not row.is_signature for row in rows):
+                prev_token = self.store.get_device_stream_token()
+                all_room_ids = await self.store.get_all_device_list_changes(
+                    prev_token, token
+                )
+                self.store.device_lists_in_rooms_have_changed(all_room_ids, token)
 
         self.store.process_replication_rows(stream_name, instance_name, token, rows)
         # NOTE: this must be called after process_replication_rows to ensure any

From 5831342024c0f4f3f8972a48515f575557439120 Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Sat, 18 May 2024 20:41:38 +0100
Subject: [PATCH 248/278] Log task usage upon finish and every 5 minutes
 otherwise

---
 synapse/util/task_scheduler.py | 68 +++++++++++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 2 deletions(-)

diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py
index 01d05c9ed60..2350d4b5ca2 100644
--- a/synapse/util/task_scheduler.py
+++ b/synapse/util/task_scheduler.py
@@ -24,7 +24,12 @@
 
 from twisted.python.failure import Failure
 
-from synapse.logging.context import nested_logging_context
+from synapse.logging.context import (
+    ContextResourceUsage,
+    LoggingContext,
+    nested_logging_context,
+    set_current_context,
+)
 from synapse.metrics import LaterGauge
 from synapse.metrics.background_process_metrics import (
     run_as_background_process,
@@ -81,6 +86,8 @@ class TaskScheduler:
     MAX_CONCURRENT_RUNNING_TASKS = 5
     # Time from the last task update after which we will log a warning
     LAST_UPDATE_BEFORE_WARNING_MS = 24 * 60 * 60 * 1000  # 24hrs
+    # Report a running task's status and usage every so often.
+    OCCASIONAL_REPORT_INTERVAL_MS = 5 * 60 * 1000  # 5 minutes
 
     def __init__(self, hs: "HomeServer"):
         self._hs = hs
@@ -346,6 +353,32 @@ async def _clean_scheduled_tasks(self) -> None:
             assert task.id not in self._running_tasks
             await self._store.delete_scheduled_task(task.id)
 
+    @staticmethod
+    def _log_task_usage(
+        state: str, task: ScheduledTask, usage: ContextResourceUsage, active_time: float
+    ) -> None:
+        """
+        Log a line describing the state and usage of a task.
+        The log line is inspired by / a copy of the request log line format,
+        but with irrelevant fields removed.
+
+        active_time: Time that the task has been running for, in seconds.
+        """
+
+        logger.info(
+            "Task %s: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
+            " [%d dbevts] %r",
+            state,
+            active_time,
+            usage.ru_utime,
+            usage.ru_stime,
+            usage.db_sched_duration_sec,
+            usage.db_txn_duration_sec,
+            int(usage.db_txn_count),
+            usage.evt_db_fetch_count,
+            task.params,
+        )
+
     async def _launch_task(self, task: ScheduledTask) -> None:
         """Launch a scheduled task now.
 
@@ -360,8 +393,32 @@ async def _launch_task(self, task: ScheduledTask) -> None:
             )
         function = self._actions[task.action]
 
+        def _occasional_report(
+            task_log_context: LoggingContext, start_time: int
+        ) -> None:
+            """
+            Helper to log a 'Task continuing' line every so often.
+            """
+
+            current_time = int(self._clock.time())
+            calling_context = set_current_context(task_log_context)
+            try:
+                usage = task_log_context.get_resource_usage()
+                TaskScheduler._log_task_usage(
+                    "continuing", task, usage, (current_time - start_time) * 0.001
+                )
+            finally:
+                set_current_context(calling_context)
+
         async def wrapper() -> None:
-            with nested_logging_context(task.id):
+            with nested_logging_context(task.id) as log_context:
+                start_time = int(self._clock.time())
+                occasional_status_call = self._clock.looping_call(
+                    _occasional_report,
+                    TaskScheduler.OCCASIONAL_REPORT_INTERVAL_MS,
+                    log_context,
+                    start_time,
+                )
                 try:
                     (status, result, error) = await function(task)
                 except Exception:
@@ -383,6 +440,13 @@ async def wrapper() -> None:
                 )
                 self._running_tasks.remove(task.id)
 
+                current_time = int(self._clock.time())
+                usage = log_context.get_resource_usage()
+                TaskScheduler._log_task_usage(
+                    status.value, task, usage, (current_time - start_time) * 0.001
+                )
+                occasional_status_call.stop()
+
             # Try launch a new task since we've finished with this one.
             self._clock.call_later(0.1, self._launch_scheduled_tasks)
 

From d9003ae50744cec56e8dc0910d6a734709edb8df Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Sat, 18 May 2024 20:44:13 +0100
Subject: [PATCH 249/278] Newsfile

Signed-off-by: Olivier 'reivilibre <oliverw@matrix.org>
---
 changelog.d/17219.feature | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/17219.feature

diff --git a/changelog.d/17219.feature b/changelog.d/17219.feature
new file mode 100644
index 00000000000..f8277a89d85
--- /dev/null
+++ b/changelog.d/17219.feature
@@ -0,0 +1 @@
+Add logging to tasks managed by the task scheduler, showing CPU and database usage.
\ No newline at end of file

From d7b8d87dadb7a1a6cd80b9b93d7fcef931f928ac Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Sat, 18 May 2024 20:41:38 +0100
Subject: [PATCH 250/278] Log task usage upon finish and every 5 minutes
 otherwise

---
 synapse/util/task_scheduler.py | 69 +++++++++++++++++++++++++++++++++-
 1 file changed, 67 insertions(+), 2 deletions(-)

diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py
index 01d05c9ed60..31922b1ce1e 100644
--- a/synapse/util/task_scheduler.py
+++ b/synapse/util/task_scheduler.py
@@ -24,7 +24,12 @@
 
 from twisted.python.failure import Failure
 
-from synapse.logging.context import nested_logging_context
+from synapse.logging.context import (
+    ContextResourceUsage,
+    LoggingContext,
+    nested_logging_context,
+    set_current_context,
+)
 from synapse.metrics import LaterGauge
 from synapse.metrics.background_process_metrics import (
     run_as_background_process,
@@ -81,6 +86,8 @@ class TaskScheduler:
     MAX_CONCURRENT_RUNNING_TASKS = 5
     # Time from the last task update after which we will log a warning
     LAST_UPDATE_BEFORE_WARNING_MS = 24 * 60 * 60 * 1000  # 24hrs
+    # Report a running task's status and usage every so often.
+    OCCASIONAL_REPORT_INTERVAL_MS = 5 * 60 * 1000  # 5 minutes
 
     def __init__(self, hs: "HomeServer"):
         self._hs = hs
@@ -346,6 +353,33 @@ async def _clean_scheduled_tasks(self) -> None:
             assert task.id not in self._running_tasks
             await self._store.delete_scheduled_task(task.id)
 
+    @staticmethod
+    def _log_task_usage(
+        state: str, task: ScheduledTask, usage: ContextResourceUsage, active_time: float
+    ) -> None:
+        """
+        Log a line describing the state and usage of a task.
+        The log line is inspired by / a copy of the request log line format,
+        but with irrelevant fields removed.
+
+        active_time: Time that the task has been running for, in seconds.
+        """
+
+        logger.info(
+            "Task %s: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
+            " [%d dbevts] %r, %r",
+            state,
+            active_time,
+            usage.ru_utime,
+            usage.ru_stime,
+            usage.db_sched_duration_sec,
+            usage.db_txn_duration_sec,
+            int(usage.db_txn_count),
+            usage.evt_db_fetch_count,
+            task.resource_id,
+            task.params,
+        )
+
     async def _launch_task(self, task: ScheduledTask) -> None:
         """Launch a scheduled task now.
 
@@ -360,8 +394,32 @@ async def _launch_task(self, task: ScheduledTask) -> None:
             )
         function = self._actions[task.action]
 
+        def _occasional_report(
+            task_log_context: LoggingContext, start_time: int
+        ) -> None:
+            """
+            Helper to log a 'Task continuing' line every so often.
+            """
+
+            current_time = int(self._clock.time())
+            calling_context = set_current_context(task_log_context)
+            try:
+                usage = task_log_context.get_resource_usage()
+                TaskScheduler._log_task_usage(
+                    "continuing", task, usage, (current_time - start_time) * 0.001
+                )
+            finally:
+                set_current_context(calling_context)
+
         async def wrapper() -> None:
-            with nested_logging_context(task.id):
+            with nested_logging_context(task.id) as log_context:
+                start_time = int(self._clock.time())
+                occasional_status_call = self._clock.looping_call(
+                    _occasional_report,
+                    TaskScheduler.OCCASIONAL_REPORT_INTERVAL_MS,
+                    log_context,
+                    start_time,
+                )
                 try:
                     (status, result, error) = await function(task)
                 except Exception:
@@ -383,6 +441,13 @@ async def wrapper() -> None:
                 )
                 self._running_tasks.remove(task.id)
 
+                current_time = int(self._clock.time())
+                usage = log_context.get_resource_usage()
+                TaskScheduler._log_task_usage(
+                    status.value, task, usage, (current_time - start_time) * 0.001
+                )
+                occasional_status_call.stop()
+
             # Try launch a new task since we've finished with this one.
             self._clock.call_later(0.1, self._launch_scheduled_tasks)
 

From 3958fdab777c2e929acb13c90797240557fda699 Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Sat, 18 May 2024 20:44:13 +0100
Subject: [PATCH 251/278] Newsfile

Signed-off-by: Olivier 'reivilibre <oliverw@matrix.org>
---
 changelog.d/17219.feature | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/17219.feature

diff --git a/changelog.d/17219.feature b/changelog.d/17219.feature
new file mode 100644
index 00000000000..f8277a89d85
--- /dev/null
+++ b/changelog.d/17219.feature
@@ -0,0 +1 @@
+Add logging to tasks managed by the task scheduler, showing CPU and database usage.
\ No newline at end of file

From effebb3e88eaa30987a32d8d9a2331023fc5f12e Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Sat, 18 May 2024 21:11:34 +0100
Subject: [PATCH 252/278] Oops! `Clock.time` gives seconds

---
 synapse/util/task_scheduler.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py
index 31922b1ce1e..448960b2978 100644
--- a/synapse/util/task_scheduler.py
+++ b/synapse/util/task_scheduler.py
@@ -395,25 +395,25 @@ async def _launch_task(self, task: ScheduledTask) -> None:
         function = self._actions[task.action]
 
         def _occasional_report(
-            task_log_context: LoggingContext, start_time: int
+            task_log_context: LoggingContext, start_time: float
         ) -> None:
             """
             Helper to log a 'Task continuing' line every so often.
             """
 
-            current_time = int(self._clock.time())
+            current_time = self._clock.time()
             calling_context = set_current_context(task_log_context)
             try:
                 usage = task_log_context.get_resource_usage()
                 TaskScheduler._log_task_usage(
-                    "continuing", task, usage, (current_time - start_time) * 0.001
+                    "continuing", task, usage, current_time - start_time
                 )
             finally:
                 set_current_context(calling_context)
 
         async def wrapper() -> None:
             with nested_logging_context(task.id) as log_context:
-                start_time = int(self._clock.time())
+                start_time = self._clock.time()
                 occasional_status_call = self._clock.looping_call(
                     _occasional_report,
                     TaskScheduler.OCCASIONAL_REPORT_INTERVAL_MS,
@@ -441,10 +441,10 @@ async def wrapper() -> None:
                 )
                 self._running_tasks.remove(task.id)
 
-                current_time = int(self._clock.time())
+                current_time = self._clock.time()
                 usage = log_context.get_resource_usage()
                 TaskScheduler._log_task_usage(
-                    status.value, task, usage, (current_time - start_time) * 0.001
+                    status.value, task, usage, current_time - start_time
                 )
                 occasional_status_call.stop()
 

From e4550d4c64c11e2c3a83d469c4d06132cbd61c5d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 29 May 2024 13:55:45 +0100
Subject: [PATCH 253/278] Fix lack of PostgresEngine imported

---
 synapse/storage/databases/main/events_worker.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index d074306bf00..a1caf0d3e76 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -2279,9 +2279,8 @@ async def get_event_id_for_timestamp(
         """
 
         def get_event_id_for_timestamp_txn(txn: LoggingTransaction) -> Optional[str]:
-            if isinstance(self.database_engine, PostgresEngine):
-                # Temporary: make sure these queries can't last more than 30s
-                txn.execute("SET LOCAL statement_timeout = 30000")
+            # Temporary: make sure these queries can't last more than 30s
+            txn.execute("SET LOCAL statement_timeout = 30000")
 
             txn.execute(
                 sql_template,

From 04dc1a7afbd46a2bf7ccfa60d729dbeea666f180 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 6 Jun 2024 15:13:21 +0100
Subject: [PATCH 254/278] Always return OTK counts

---
 synapse/handlers/sync.py | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 1d7d9dfdd0f..2d1291b9f80 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -285,7 +285,11 @@ def __bool__(self) -> bool:
         )
 
     @staticmethod
-    def empty(next_batch: StreamToken) -> "SyncResult":
+    def empty(
+        next_batch: StreamToken,
+        device_one_time_keys_count: JsonMapping,
+        device_unused_fallback_key_types: List[str],
+    ) -> "SyncResult":
         "Return a new empty result"
         return SyncResult(
             next_batch=next_batch,
@@ -297,8 +301,8 @@ def empty(next_batch: StreamToken) -> "SyncResult":
             archived=[],
             to_device=[],
             device_lists=DeviceListUpdates(),
-            device_one_time_keys_count={},
-            device_unused_fallback_key_types=[],
+            device_one_time_keys_count=device_one_time_keys_count,
+            device_unused_fallback_key_types=device_unused_fallback_key_types,
         )
 
 
@@ -523,7 +527,27 @@ async def _wait_for_sync_for_user(
                 logger.warning(
                     "Timed out waiting for worker to catch up. Returning empty response"
                 )
-                return SyncResult.empty(since_token)
+                device_id = sync_config.device_id
+                one_time_keys_count: JsonMapping = {}
+                unused_fallback_key_types: List[str] = []
+                if device_id:
+                    user_id = sync_config.user.to_string()
+                    # TODO: We should have a way to let clients differentiate between the states of:
+                    #   * no change in OTK count since the provided since token
+                    #   * the server has zero OTKs left for this device
+                    #  Spec issue: https://github.com/matrix-org/matrix-doc/issues/3298
+                    one_time_keys_count = await self.store.count_e2e_one_time_keys(
+                        user_id, device_id
+                    )
+                    unused_fallback_key_types = list(
+                        await self.store.get_e2e_unused_fallback_key_types(
+                            user_id, device_id
+                        )
+                    )
+
+                return SyncResult.empty(
+                    since_token, one_time_keys_count, unused_fallback_key_types
+                )
 
             # If we've spent significant time waiting to catch up, take it off
             # the timeout.

From b03e8cdaec38e8210d907eb7061d1bb094b02f3d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 6 Jun 2024 15:46:00 +0100
Subject: [PATCH 255/278] Don't cache

---
 synapse/handlers/sync.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 2d1291b9f80..b8587f45459 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -545,6 +545,7 @@ async def _wait_for_sync_for_user(
                         )
                     )
 
+                cache_context.should_cache = False
                 return SyncResult.empty(
                     since_token, one_time_keys_count, unused_fallback_key_types
                 )

From 52f47d12a9ab62fa41d8426e71cbe06e65b1c762 Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@element.io>
Date: Wed, 12 Jun 2024 04:27:46 -0600
Subject: [PATCH 256/278] Add report room API (MSC4151) (#17270)

https://github.com/matrix-org/matrix-spec-proposals/pull/4151

This is intended to be enabled by default for immediate use. When FCP is
complete, the unstable endpoint will be dropped and stable endpoint
supported instead - no backwards compatibility is expected for the
unstable endpoint.
---
 changelog.d/17270.feature                     |  1 +
 synapse/config/experimental.py                |  3 +
 synapse/rest/__init__.py                      |  4 +-
 .../client/{report_event.py => reporting.py}  | 57 +++++++++++-
 synapse/rest/client/versions.py               |  2 +
 synapse/storage/databases/main/room.py        | 32 +++++++
 .../main/delta/85/06_add_room_reports.sql     | 20 ++++
 tests/rest/admin/test_event_reports.py        |  6 +-
 ...test_report_event.py => test_reporting.py} | 93 ++++++++++++++++++-
 9 files changed, 210 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/17270.feature
 rename synapse/rest/client/{report_event.py => reporting.py} (65%)
 create mode 100644 synapse/storage/schema/main/delta/85/06_add_room_reports.sql
 rename tests/rest/client/{test_report_event.py => test_reporting.py} (64%)

diff --git a/changelog.d/17270.feature b/changelog.d/17270.feature
new file mode 100644
index 00000000000..4ea5e7be850
--- /dev/null
+++ b/changelog.d/17270.feature
@@ -0,0 +1 @@
+Add support for the unstable [MSC4151](https://github.com/matrix-org/matrix-spec-proposals/pull/4151) report room API.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 75fe6d7b247..5fe5b951dd4 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -443,3 +443,6 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         self.msc3916_authenticated_media_enabled = experimental.get(
             "msc3916_authenticated_media_enabled", False
         )
+
+        # MSC4151: Report room API (Client-Server API)
+        self.msc4151_enabled: bool = experimental.get("msc4151_enabled", False)
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index 534dc0e2762..0024ccf7080 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -53,7 +53,7 @@
     register,
     relations,
     rendezvous,
-    report_event,
+    reporting,
     room,
     room_keys,
     room_upgrade_rest_servlet,
@@ -128,7 +128,7 @@ def register_servlets(client_resource: HttpServer, hs: "HomeServer") -> None:
         tags.register_servlets(hs, client_resource)
         account_data.register_servlets(hs, client_resource)
         if is_main_process:
-            report_event.register_servlets(hs, client_resource)
+            reporting.register_servlets(hs, client_resource)
             openid.register_servlets(hs, client_resource)
         notifications.register_servlets(hs, client_resource)
         devices.register_servlets(hs, client_resource)
diff --git a/synapse/rest/client/report_event.py b/synapse/rest/client/reporting.py
similarity index 65%
rename from synapse/rest/client/report_event.py
rename to synapse/rest/client/reporting.py
index 447281931eb..a95b83b14d9 100644
--- a/synapse/rest/client/report_event.py
+++ b/synapse/rest/client/reporting.py
@@ -23,17 +23,28 @@
 from http import HTTPStatus
 from typing import TYPE_CHECKING, Tuple
 
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
 from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
 from synapse.http.server import HttpServer
-from synapse.http.servlet import RestServlet, parse_json_object_from_request
+from synapse.http.servlet import (
+    RestServlet,
+    parse_and_validate_json_object_from_request,
+    parse_json_object_from_request,
+)
 from synapse.http.site import SynapseRequest
 from synapse.types import JsonDict
+from synapse.types.rest import RequestBodyModel
 
 from ._base import client_patterns
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
 
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import StrictStr
+else:
+    from pydantic import StrictStr
+
 logger = logging.getLogger(__name__)
 
 
@@ -95,5 +106,49 @@ async def on_POST(
         return 200, {}
 
 
+class ReportRoomRestServlet(RestServlet):
+    # https://github.com/matrix-org/matrix-spec-proposals/pull/4151
+    PATTERNS = client_patterns(
+        "/org.matrix.msc4151/rooms/(?P<room_id>[^/]*)/report$",
+        releases=[],
+        v1=False,
+        unstable=True,
+    )
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        self.hs = hs
+        self.auth = hs.get_auth()
+        self.clock = hs.get_clock()
+        self.store = hs.get_datastores().main
+
+    class PostBody(RequestBodyModel):
+        reason: StrictStr
+
+    async def on_POST(
+        self, request: SynapseRequest, room_id: str
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
+        user_id = requester.user.to_string()
+
+        body = parse_and_validate_json_object_from_request(request, self.PostBody)
+
+        room = await self.store.get_room(room_id)
+        if room is None:
+            raise NotFoundError("Room does not exist")
+
+        await self.store.add_room_report(
+            room_id=room_id,
+            user_id=user_id,
+            reason=body.reason,
+            received_ts=self.clock.time_msec(),
+        )
+
+        return 200, {}
+
+
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     ReportEventRestServlet(hs).register(http_server)
+
+    if hs.config.experimental.msc4151_enabled:
+        ReportRoomRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 56de6906d00..f4281581393 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -149,6 +149,8 @@ def on_GET(self, request: Request) -> Tuple[int, JsonDict]:
                             is not None
                         )
                     ),
+                    # MSC4151: Report room API (Client-Server API)
+                    "org.matrix.msc4151": self.config.experimental.msc4151_enabled,
                 },
             },
         )
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 616c941687a..b8a71c803e7 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -2207,6 +2207,7 @@ def __init__(
         super().__init__(database, db_conn, hs)
 
         self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id")
+        self._room_reports_id_gen = IdGenerator(db_conn, "room_reports", "id")
 
         self._instance_name = hs.get_instance_name()
 
@@ -2416,6 +2417,37 @@ async def add_event_report(
         )
         return next_id
 
+    async def add_room_report(
+        self,
+        room_id: str,
+        user_id: str,
+        reason: str,
+        received_ts: int,
+    ) -> int:
+        """Add a room report
+
+        Args:
+            room_id: The room ID being reported.
+            user_id: User who reports the room.
+            reason: Description that the user specifies.
+            received_ts: Time when the user submitted the report (milliseconds).
+        Returns:
+            Id of the room report.
+        """
+        next_id = self._room_reports_id_gen.get_next()
+        await self.db_pool.simple_insert(
+            table="room_reports",
+            values={
+                "id": next_id,
+                "received_ts": received_ts,
+                "room_id": room_id,
+                "user_id": user_id,
+                "reason": reason,
+            },
+            desc="add_room_report",
+        )
+        return next_id
+
     async def block_room(self, room_id: str, user_id: str) -> None:
         """Marks the room as blocked.
 
diff --git a/synapse/storage/schema/main/delta/85/06_add_room_reports.sql b/synapse/storage/schema/main/delta/85/06_add_room_reports.sql
new file mode 100644
index 00000000000..f7b45276cff
--- /dev/null
+++ b/synapse/storage/schema/main/delta/85/06_add_room_reports.sql
@@ -0,0 +1,20 @@
+--
+-- This file is licensed under the Affero General Public License (AGPL) version 3.
+--
+-- Copyright (C) 2024 New Vector, Ltd
+--
+-- This program is free software: you can redistribute it and/or modify
+-- it under the terms of the GNU Affero General Public License as
+-- published by the Free Software Foundation, either version 3 of the
+-- License, or (at your option) any later version.
+--
+-- See the GNU Affero General Public License for more details:
+-- <https://www.gnu.org/licenses/agpl-3.0.html>.
+
+CREATE TABLE room_reports (
+    id BIGINT NOT NULL PRIMARY KEY,
+    received_ts BIGINT NOT NULL,
+    room_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    reason TEXT NOT NULL
+);
diff --git a/tests/rest/admin/test_event_reports.py b/tests/rest/admin/test_event_reports.py
index a0f978911af..feb410a11d6 100644
--- a/tests/rest/admin/test_event_reports.py
+++ b/tests/rest/admin/test_event_reports.py
@@ -24,7 +24,7 @@
 
 import synapse.rest.admin
 from synapse.api.errors import Codes
-from synapse.rest.client import login, report_event, room
+from synapse.rest.client import login, reporting, room
 from synapse.server import HomeServer
 from synapse.types import JsonDict
 from synapse.util import Clock
@@ -37,7 +37,7 @@ class EventReportsTestCase(unittest.HomeserverTestCase):
         synapse.rest.admin.register_servlets,
         login.register_servlets,
         room.register_servlets,
-        report_event.register_servlets,
+        reporting.register_servlets,
     ]
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
@@ -453,7 +453,7 @@ class EventReportDetailTestCase(unittest.HomeserverTestCase):
         synapse.rest.admin.register_servlets,
         login.register_servlets,
         room.register_servlets,
-        report_event.register_servlets,
+        reporting.register_servlets,
     ]
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
diff --git a/tests/rest/client/test_report_event.py b/tests/rest/client/test_reporting.py
similarity index 64%
rename from tests/rest/client/test_report_event.py
rename to tests/rest/client/test_reporting.py
index 5903771e52f..009deb9cb05 100644
--- a/tests/rest/client/test_report_event.py
+++ b/tests/rest/client/test_reporting.py
@@ -22,7 +22,7 @@
 from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
-from synapse.rest.client import login, report_event, room
+from synapse.rest.client import login, reporting, room
 from synapse.server import HomeServer
 from synapse.types import JsonDict
 from synapse.util import Clock
@@ -35,7 +35,7 @@ class ReportEventTestCase(unittest.HomeserverTestCase):
         synapse.rest.admin.register_servlets,
         login.register_servlets,
         room.register_servlets,
-        report_event.register_servlets,
+        reporting.register_servlets,
     ]
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
@@ -139,3 +139,92 @@ def _assert_status(self, response_status: int, data: JsonDict) -> None:
             "POST", self.report_path, data, access_token=self.other_user_tok
         )
         self.assertEqual(response_status, channel.code, msg=channel.result["body"])
+
+
+class ReportRoomTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        login.register_servlets,
+        room.register_servlets,
+        reporting.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.other_user = self.register_user("user", "pass")
+        self.other_user_tok = self.login("user", "pass")
+
+        self.room_id = self.helper.create_room_as(
+            self.other_user, tok=self.other_user_tok, is_public=True
+        )
+        self.report_path = (
+            f"/_matrix/client/unstable/org.matrix.msc4151/rooms/{self.room_id}/report"
+        )
+
+    @unittest.override_config(
+        {
+            "experimental_features": {"msc4151_enabled": True},
+        }
+    )
+    def test_reason_str(self) -> None:
+        data = {"reason": "this makes me sad"}
+        self._assert_status(200, data)
+
+    @unittest.override_config(
+        {
+            "experimental_features": {"msc4151_enabled": True},
+        }
+    )
+    def test_no_reason(self) -> None:
+        data = {"not_reason": "for typechecking"}
+        self._assert_status(400, data)
+
+    @unittest.override_config(
+        {
+            "experimental_features": {"msc4151_enabled": True},
+        }
+    )
+    def test_reason_nonstring(self) -> None:
+        data = {"reason": 42}
+        self._assert_status(400, data)
+
+    @unittest.override_config(
+        {
+            "experimental_features": {"msc4151_enabled": True},
+        }
+    )
+    def test_reason_null(self) -> None:
+        data = {"reason": None}
+        self._assert_status(400, data)
+
+    @unittest.override_config(
+        {
+            "experimental_features": {"msc4151_enabled": True},
+        }
+    )
+    def test_cannot_report_nonexistent_room(self) -> None:
+        """
+        Tests that we don't accept event reports for rooms which do not exist.
+        """
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/unstable/org.matrix.msc4151/rooms/!bloop:example.org/report",
+            {"reason": "i am very sad"},
+            access_token=self.other_user_tok,
+            shorthand=False,
+        )
+        self.assertEqual(404, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            "Room does not exist",
+            channel.json_body["error"],
+            msg=channel.result["body"],
+        )
+
+    def _assert_status(self, response_status: int, data: JsonDict) -> None:
+        channel = self.make_request(
+            "POST",
+            self.report_path,
+            data,
+            access_token=self.other_user_tok,
+            shorthand=False,
+        )
+        self.assertEqual(response_status, channel.code, msg=channel.result["body"])

From 6a860838bb8485dcde4d2aa48115e0a41ded91b1 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Wed, 12 Jun 2024 13:35:59 +0200
Subject: [PATCH 257/278] Fix the import for RequestBodyModel

This apparently got moved in develop, so cherry-picking the merge
stuff.

This should be reverted once we merge develop again
---
 synapse/rest/client/reporting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/rest/client/reporting.py b/synapse/rest/client/reporting.py
index a95b83b14d9..555f5656182 100644
--- a/synapse/rest/client/reporting.py
+++ b/synapse/rest/client/reporting.py
@@ -32,8 +32,8 @@
     parse_json_object_from_request,
 )
 from synapse.http.site import SynapseRequest
+from synapse.rest.models import RequestBodyModel
 from synapse.types import JsonDict
-from synapse.types.rest import RequestBodyModel
 
 from ._base import client_patterns
 

From 0b01683e80925aa84fd3b317fc4caee85ee92c9b Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Tue, 18 Jun 2024 16:54:19 +0100
Subject: [PATCH 258/278] Revert "Support MSC3916 by adding a federation
 `/download` endpoint" (#17325)

---
 changelog.d/17172.feature                     |   2 -
 changelog.d/17325.misc                        |   1 +
 .../federation/transport/server/__init__.py   |  24 --
 synapse/federation/transport/server/_base.py  |  24 +-
 .../federation/transport/server/federation.py |  41 ---
 synapse/media/_base.py                        |  63 +----
 synapse/media/media_repository.py             |  18 +-
 synapse/media/media_storage.py                | 223 +----------------
 synapse/media/storage_provider.py             |  40 +--
 tests/federation/test_federation_media.py     | 234 ------------------
 tests/media/test_media_storage.py             |  14 +-
 11 files changed, 25 insertions(+), 659 deletions(-)
 delete mode 100644 changelog.d/17172.feature
 create mode 100644 changelog.d/17325.misc
 delete mode 100644 tests/federation/test_federation_media.py

diff --git a/changelog.d/17172.feature b/changelog.d/17172.feature
deleted file mode 100644
index 245dea815cb..00000000000
--- a/changelog.d/17172.feature
+++ /dev/null
@@ -1,2 +0,0 @@
-Support [MSC3916](https://github.com/matrix-org/matrix-spec-proposals/blob/rav/authentication-for-media/proposals/3916-authentication-for-media.md)
-by adding a federation /download endpoint (#17172).
\ No newline at end of file
diff --git a/changelog.d/17325.misc b/changelog.d/17325.misc
new file mode 100644
index 00000000000..1a4ce7ceecb
--- /dev/null
+++ b/changelog.d/17325.misc
@@ -0,0 +1 @@
+This is a changelog so tests will run.
\ No newline at end of file
diff --git a/synapse/federation/transport/server/__init__.py b/synapse/federation/transport/server/__init__.py
index 266675c9b85..bac569e9770 100644
--- a/synapse/federation/transport/server/__init__.py
+++ b/synapse/federation/transport/server/__init__.py
@@ -19,7 +19,6 @@
 # [This file includes modifications made by New Vector Limited]
 #
 #
-import inspect
 import logging
 from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Type
 
@@ -34,7 +33,6 @@
     FEDERATION_SERVLET_CLASSES,
     FederationAccountStatusServlet,
     FederationUnstableClientKeysClaimServlet,
-    FederationUnstableMediaDownloadServlet,
 )
 from synapse.http.server import HttpServer, JsonResource
 from synapse.http.servlet import (
@@ -317,28 +315,6 @@ def register_servlets(
             ):
                 continue
 
-            if servletclass == FederationUnstableMediaDownloadServlet:
-                if (
-                    not hs.config.server.enable_media_repo
-                    or not hs.config.experimental.msc3916_authenticated_media_enabled
-                ):
-                    continue
-
-                # don't load the endpoint if the storage provider is incompatible
-                media_repo = hs.get_media_repository()
-                load_download_endpoint = True
-                for provider in media_repo.media_storage.storage_providers:
-                    signature = inspect.signature(provider.backend.fetch)
-                    if "federation" not in signature.parameters:
-                        logger.warning(
-                            f"Federation media `/download` endpoint will not be enabled as storage provider {provider.backend} is not compatible with this endpoint."
-                        )
-                        load_download_endpoint = False
-                        break
-
-                if not load_download_endpoint:
-                    continue
-
             servletclass(
                 hs=hs,
                 authenticator=authenticator,
diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py
index 4e2717b5655..db0f5076a9e 100644
--- a/synapse/federation/transport/server/_base.py
+++ b/synapse/federation/transport/server/_base.py
@@ -360,29 +360,13 @@ async def new_func(
                                     "request"
                                 )
                                 return None
-                            if (
-                                func.__self__.__class__.__name__  # type: ignore
-                                == "FederationUnstableMediaDownloadServlet"
-                            ):
-                                response = await func(
-                                    origin, content, request, *args, **kwargs
-                                )
-                            else:
-                                response = await func(
-                                    origin, content, request.args, *args, **kwargs
-                                )
-                    else:
-                        if (
-                            func.__self__.__class__.__name__  # type: ignore
-                            == "FederationUnstableMediaDownloadServlet"
-                        ):
-                            response = await func(
-                                origin, content, request, *args, **kwargs
-                            )
-                        else:
                             response = await func(
                                 origin, content, request.args, *args, **kwargs
                             )
+                    else:
+                        response = await func(
+                            origin, content, request.args, *args, **kwargs
+                        )
             finally:
                 # if we used the origin's context as the parent, add a new span using
                 # the servlet span as a parent, so that we have a link
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index 1f02451efa5..a59734785fa 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -44,13 +44,10 @@
 )
 from synapse.http.servlet import (
     parse_boolean_from_args,
-    parse_integer,
     parse_integer_from_args,
     parse_string_from_args,
     parse_strings_from_args,
 )
-from synapse.http.site import SynapseRequest
-from synapse.media._base import DEFAULT_MAX_TIMEOUT_MS, MAXIMUM_ALLOWED_MAX_TIMEOUT_MS
 from synapse.types import JsonDict
 from synapse.util import SYNAPSE_VERSION
 from synapse.util.ratelimitutils import FederationRateLimiter
@@ -790,43 +787,6 @@ async def on_POST(
         return 200, {"account_statuses": statuses, "failures": failures}
 
 
-class FederationUnstableMediaDownloadServlet(BaseFederationServerServlet):
-    """
-    Implementation of new federation media `/download` endpoint outlined in MSC3916. Returns
-    a multipart/form-data response consisting of a JSON object and the requested media
-    item. This endpoint only returns local media.
-    """
-
-    PATH = "/media/download/(?P<media_id>[^/]*)"
-    PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc3916"
-    RATELIMIT = True
-
-    def __init__(
-        self,
-        hs: "HomeServer",
-        ratelimiter: FederationRateLimiter,
-        authenticator: Authenticator,
-        server_name: str,
-    ):
-        super().__init__(hs, authenticator, ratelimiter, server_name)
-        self.media_repo = self.hs.get_media_repository()
-
-    async def on_GET(
-        self,
-        origin: Optional[str],
-        content: Literal[None],
-        request: SynapseRequest,
-        media_id: str,
-    ) -> None:
-        max_timeout_ms = parse_integer(
-            request, "timeout_ms", default=DEFAULT_MAX_TIMEOUT_MS
-        )
-        max_timeout_ms = min(max_timeout_ms, MAXIMUM_ALLOWED_MAX_TIMEOUT_MS)
-        await self.media_repo.get_local_media(
-            request, media_id, None, max_timeout_ms, federation=True
-        )
-
-
 FEDERATION_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = (
     FederationSendServlet,
     FederationEventServlet,
@@ -858,5 +818,4 @@ async def on_GET(
     FederationV1SendKnockServlet,
     FederationMakeKnockServlet,
     FederationAccountStatusServlet,
-    FederationUnstableMediaDownloadServlet,
 )
diff --git a/synapse/media/_base.py b/synapse/media/_base.py
index 19bca94170c..3fbed6062f8 100644
--- a/synapse/media/_base.py
+++ b/synapse/media/_base.py
@@ -25,16 +25,7 @@
 import urllib
 from abc import ABC, abstractmethod
 from types import TracebackType
-from typing import (
-    TYPE_CHECKING,
-    Awaitable,
-    Dict,
-    Generator,
-    List,
-    Optional,
-    Tuple,
-    Type,
-)
+from typing import Awaitable, Dict, Generator, List, Optional, Tuple, Type
 
 import attr
 
@@ -48,11 +39,6 @@
 from synapse.logging.context import make_deferred_yieldable
 from synapse.util.stringutils import is_ascii
 
-if TYPE_CHECKING:
-    from synapse.media.media_storage import MultipartResponder
-    from synapse.storage.databases.main.media_repository import LocalMedia
-
-
 logger = logging.getLogger(__name__)
 
 # list all text content types that will have the charset default to UTF-8 when
@@ -274,53 +260,6 @@ def _can_encode_filename_as_token(x: str) -> bool:
     return True
 
 
-async def respond_with_multipart_responder(
-    request: SynapseRequest,
-    responder: "Optional[MultipartResponder]",
-    media_info: "LocalMedia",
-) -> None:
-    """
-    Responds via a Multipart responder for the federation media `/download` requests
-
-    Args:
-        request: the federation request to respond to
-        responder: the Multipart responder which will send the response
-        media_info: metadata about the media item
-    """
-    if not responder:
-        respond_404(request)
-        return
-
-    # If we have a responder we *must* use it as a context manager.
-    with responder:
-        if request._disconnected:
-            logger.warning(
-                "Not sending response to request %s, already disconnected.", request
-            )
-            return
-
-        logger.debug("Responding to media request with responder %s", responder)
-        if media_info.media_length is not None:
-            request.setHeader(b"Content-Length", b"%d" % (media_info.media_length,))
-        request.setHeader(
-            b"Content-Type", b"multipart/mixed; boundary=%s" % responder.boundary
-        )
-
-        try:
-            await responder.write_to_consumer(request)
-        except Exception as e:
-            # The majority of the time this will be due to the client having gone
-            # away. Unfortunately, Twisted simply throws a generic exception at us
-            # in that case.
-            logger.warning("Failed to write to consumer: %s %s", type(e), e)
-
-            # Unregister the producer, if it has one, so Twisted doesn't complain
-            if request.producer:
-                request.unregisterProducer()
-
-    finish_request(request)
-
-
 async def respond_with_responder(
     request: SynapseRequest,
     responder: "Optional[Responder]",
diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py
index c335e518a04..6ed56099ca7 100644
--- a/synapse/media/media_repository.py
+++ b/synapse/media/media_repository.py
@@ -54,11 +54,10 @@
     ThumbnailInfo,
     get_filename_from_headers,
     respond_404,
-    respond_with_multipart_responder,
     respond_with_responder,
 )
 from synapse.media.filepath import MediaFilePaths
-from synapse.media.media_storage import MediaStorage, MultipartResponder
+from synapse.media.media_storage import MediaStorage
 from synapse.media.storage_provider import StorageProviderWrapper
 from synapse.media.thumbnailer import Thumbnailer, ThumbnailError
 from synapse.media.url_previewer import UrlPreviewer
@@ -430,7 +429,6 @@ async def get_local_media(
         media_id: str,
         name: Optional[str],
         max_timeout_ms: int,
-        federation: bool = False,
     ) -> None:
         """Responds to requests for local media, if exists, or returns 404.
 
@@ -442,7 +440,6 @@ async def get_local_media(
                 the filename in the Content-Disposition header of the response.
             max_timeout_ms: the maximum number of milliseconds to wait for the
                 media to be uploaded.
-            federation: whether the local media being fetched is for a federation request
 
         Returns:
             Resolves once a response has successfully been written to request
@@ -462,17 +459,10 @@ async def get_local_media(
 
         file_info = FileInfo(None, media_id, url_cache=bool(url_cache))
 
-        responder = await self.media_storage.fetch_media(
-            file_info, media_info, federation
+        responder = await self.media_storage.fetch_media(file_info)
+        await respond_with_responder(
+            request, responder, media_type, media_length, upload_name
         )
-        if federation:
-            # this really should be a Multipart responder but just in case
-            assert isinstance(responder, MultipartResponder)
-            await respond_with_multipart_responder(request, responder, media_info)
-        else:
-            await respond_with_responder(
-                request, responder, media_type, media_length, upload_name
-            )
 
     async def get_remote_media(
         self,
diff --git a/synapse/media/media_storage.py b/synapse/media/media_storage.py
index 2f55d12b6b5..b3cd3fd8f48 100644
--- a/synapse/media/media_storage.py
+++ b/synapse/media/media_storage.py
@@ -19,12 +19,9 @@
 #
 #
 import contextlib
-import json
 import logging
 import os
 import shutil
-from contextlib import closing
-from io import BytesIO
 from types import TracebackType
 from typing import (
     IO,
@@ -33,19 +30,14 @@
     AsyncIterator,
     BinaryIO,
     Callable,
-    List,
     Optional,
     Sequence,
     Tuple,
     Type,
-    Union,
 )
-from uuid import uuid4
 
 import attr
-from zope.interface import implementer
 
-from twisted.internet import defer, interfaces
 from twisted.internet.defer import Deferred
 from twisted.internet.interfaces import IConsumer
 from twisted.protocols.basic import FileSender
@@ -56,19 +48,15 @@
 from synapse.util import Clock
 from synapse.util.file_consumer import BackgroundFileConsumer
 
-from ..storage.databases.main.media_repository import LocalMedia
-from ..types import JsonDict
 from ._base import FileInfo, Responder
 from .filepath import MediaFilePaths
 
 if TYPE_CHECKING:
-    from synapse.media.storage_provider import StorageProviderWrapper
+    from synapse.media.storage_provider import StorageProvider
     from synapse.server import HomeServer
 
 logger = logging.getLogger(__name__)
 
-CRLF = b"\r\n"
-
 
 class MediaStorage:
     """Responsible for storing/fetching files from local sources.
@@ -85,7 +73,7 @@ def __init__(
         hs: "HomeServer",
         local_media_directory: str,
         filepaths: MediaFilePaths,
-        storage_providers: Sequence["StorageProviderWrapper"],
+        storage_providers: Sequence["StorageProvider"],
     ):
         self.hs = hs
         self.reactor = hs.get_reactor()
@@ -181,23 +169,15 @@ async def store_into_file(
 
             raise e from None
 
-    async def fetch_media(
-        self,
-        file_info: FileInfo,
-        media_info: Optional[LocalMedia] = None,
-        federation: bool = False,
-    ) -> Optional[Responder]:
+    async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
         """Attempts to fetch media described by file_info from the local cache
         and configured storage providers.
 
         Args:
-            file_info: Metadata about the media file
-            media_info: Metadata about the media item
-            federation: Whether this file is being fetched for a federation request
+            file_info
 
         Returns:
-            If the file was found returns a Responder (a Multipart Responder if the requested
-            file is for the federation /download endpoint), otherwise None.
+            Returns a Responder if the file was found, otherwise None.
         """
         paths = [self._file_info_to_path(file_info)]
 
@@ -217,19 +197,12 @@ async def fetch_media(
             local_path = os.path.join(self.local_media_directory, path)
             if os.path.exists(local_path):
                 logger.debug("responding with local file %s", local_path)
-                if federation:
-                    assert media_info is not None
-                    boundary = uuid4().hex.encode("ascii")
-                    return MultipartResponder(
-                        open(local_path, "rb"), media_info, boundary
-                    )
-                else:
-                    return FileResponder(open(local_path, "rb"))
+                return FileResponder(open(local_path, "rb"))
             logger.debug("local file %s did not exist", local_path)
 
         for provider in self.storage_providers:
             for path in paths:
-                res: Any = await provider.fetch(path, file_info, media_info, federation)
+                res: Any = await provider.fetch(path, file_info)
                 if res:
                     logger.debug("Streaming %s from %s", path, provider)
                     return res
@@ -343,7 +316,7 @@ class FileResponder(Responder):
     """Wraps an open file that can be sent to a request.
 
     Args:
-        open_file: A file like object to be streamed to the client,
+        open_file: A file like object to be streamed ot the client,
             is closed when finished streaming.
     """
 
@@ -364,38 +337,6 @@ def __exit__(
         self.open_file.close()
 
 
-class MultipartResponder(Responder):
-    """Wraps an open file, formats the response according to MSC3916 and sends it to a
-    federation request.
-
-    Args:
-        open_file: A file like object to be streamed to the client,
-            is closed when finished streaming.
-        media_info: metadata about the media item
-        boundary: bytes to use for the multipart response boundary
-    """
-
-    def __init__(self, open_file: IO, media_info: LocalMedia, boundary: bytes) -> None:
-        self.open_file = open_file
-        self.media_info = media_info
-        self.boundary = boundary
-
-    def write_to_consumer(self, consumer: IConsumer) -> Deferred:
-        return make_deferred_yieldable(
-            MultipartFileSender().beginFileTransfer(
-                self.open_file, consumer, self.media_info.media_type, {}, self.boundary
-            )
-        )
-
-    def __exit__(
-        self,
-        exc_type: Optional[Type[BaseException]],
-        exc_val: Optional[BaseException],
-        exc_tb: Optional[TracebackType],
-    ) -> None:
-        self.open_file.close()
-
-
 class SpamMediaException(NotFoundError):
     """The media was blocked by a spam checker, so we simply 404 the request (in
     the same way as if it was quarantined).
@@ -429,151 +370,3 @@ async def write_chunks_to(self, callback: Callable[[bytes], object]) -> None:
 
                 # We yield to the reactor by sleeping for 0 seconds.
                 await self.clock.sleep(0)
-
-
-@implementer(interfaces.IProducer)
-class MultipartFileSender:
-    """
-    A producer that sends the contents of a file to a federation request in the format
-    outlined in MSC3916 - a multipart/format-data response where the first field is a
-    JSON object and the second is the requested file.
-
-    This is a slight re-writing of twisted.protocols.basic.FileSender to achieve the format
-    outlined above.
-    """
-
-    CHUNK_SIZE = 2**14
-
-    lastSent = ""
-    deferred: Optional[defer.Deferred] = None
-
-    def beginFileTransfer(
-        self,
-        file: IO,
-        consumer: IConsumer,
-        file_content_type: str,
-        json_object: JsonDict,
-        boundary: bytes,
-    ) -> Deferred:
-        """
-        Begin transferring a file
-
-        Args:
-            file: The file object to read data from
-            consumer: The synapse request to write the data to
-            file_content_type: The content-type of the file
-            json_object: The JSON object to write to the first field of the response
-            boundary: bytes to be used as the multipart/form-data boundary
-
-        Returns:  A deferred whose callback will be invoked when the file has
-        been completely written to the consumer. The last byte written to the
-        consumer is passed to the callback.
-        """
-        self.file: Optional[IO] = file
-        self.consumer = consumer
-        self.json_field = json_object
-        self.json_field_written = False
-        self.content_type_written = False
-        self.file_content_type = file_content_type
-        self.boundary = boundary
-        self.deferred: Deferred = defer.Deferred()
-        self.consumer.registerProducer(self, False)
-        # while it's not entirely clear why this assignment is necessary, it mirrors
-        # the behavior in FileSender.beginFileTransfer and thus is preserved here
-        deferred = self.deferred
-        return deferred
-
-    def resumeProducing(self) -> None:
-        # write the first field, which will always be a json field
-        if not self.json_field_written:
-            self.consumer.write(CRLF + b"--" + self.boundary + CRLF)
-
-            content_type = Header(b"Content-Type", b"application/json")
-            self.consumer.write(bytes(content_type) + CRLF)
-
-            json_field = json.dumps(self.json_field)
-            json_bytes = json_field.encode("utf-8")
-            self.consumer.write(json_bytes)
-            self.consumer.write(CRLF + b"--" + self.boundary + CRLF)
-
-            self.json_field_written = True
-
-        chunk: Any = ""
-        if self.file:
-            # if we haven't written the content type yet, do so
-            if not self.content_type_written:
-                type = self.file_content_type.encode("utf-8")
-                content_type = Header(b"Content-Type", type)
-                self.consumer.write(bytes(content_type) + CRLF)
-                self.content_type_written = True
-
-            chunk = self.file.read(self.CHUNK_SIZE)
-
-        if not chunk:
-            # we've reached the end of the file
-            self.consumer.write(CRLF + b"--" + self.boundary + b"--" + CRLF)
-            self.file = None
-            self.consumer.unregisterProducer()
-
-            if self.deferred:
-                self.deferred.callback(self.lastSent)
-                self.deferred = None
-            return
-
-        self.consumer.write(chunk)
-        self.lastSent = chunk[-1:]
-
-    def pauseProducing(self) -> None:
-        pass
-
-    def stopProducing(self) -> None:
-        if self.deferred:
-            self.deferred.errback(Exception("Consumer asked us to stop producing"))
-            self.deferred = None
-
-
-class Header:
-    """
-    `Header` This class is a tiny wrapper that produces
-    request headers. We can't use standard python header
-    class because it encodes unicode fields using =? bla bla ?=
-    encoding, which is correct, but no one in HTTP world expects
-    that, everyone wants utf-8 raw bytes. (stolen from treq.multipart)
-
-    """
-
-    def __init__(
-        self,
-        name: bytes,
-        value: Any,
-        params: Optional[List[Tuple[Any, Any]]] = None,
-    ):
-        self.name = name
-        self.value = value
-        self.params = params or []
-
-    def add_param(self, name: Any, value: Any) -> None:
-        self.params.append((name, value))
-
-    def __bytes__(self) -> bytes:
-        with closing(BytesIO()) as h:
-            h.write(self.name + b": " + escape(self.value).encode("us-ascii"))
-            if self.params:
-                for name, val in self.params:
-                    h.write(b"; ")
-                    h.write(escape(name).encode("us-ascii"))
-                    h.write(b"=")
-                    h.write(b'"' + escape(val).encode("utf-8") + b'"')
-            h.seek(0)
-            return h.read()
-
-
-def escape(value: Union[str, bytes]) -> str:
-    """
-    This function prevents header values from corrupting the request,
-    a newline in the file name parameter makes form-data request unreadable
-    for a majority of parsers. (stolen from treq.multipart)
-    """
-    if isinstance(value, bytes):
-        value = value.decode("utf-8")
-    return value.replace("\r", "").replace("\n", "").replace('"', '\\"')
diff --git a/synapse/media/storage_provider.py b/synapse/media/storage_provider.py
index a2d50adf658..06e5d27a53a 100644
--- a/synapse/media/storage_provider.py
+++ b/synapse/media/storage_provider.py
@@ -24,16 +24,14 @@
 import os
 import shutil
 from typing import TYPE_CHECKING, Callable, Optional
-from uuid import uuid4
 
 from synapse.config._base import Config
 from synapse.logging.context import defer_to_thread, run_in_background
 from synapse.logging.opentracing import start_active_span, trace_with_opname
 from synapse.util.async_helpers import maybe_awaitable
 
-from ..storage.databases.main.media_repository import LocalMedia
 from ._base import FileInfo, Responder
-from .media_storage import FileResponder, MultipartResponder
+from .media_storage import FileResponder
 
 logger = logging.getLogger(__name__)
 
@@ -57,21 +55,13 @@ async def store_file(self, path: str, file_info: FileInfo) -> None:
         """
 
     @abc.abstractmethod
-    async def fetch(
-        self,
-        path: str,
-        file_info: FileInfo,
-        media_info: Optional[LocalMedia] = None,
-        federation: bool = False,
-    ) -> Optional[Responder]:
+    async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
         """Attempt to fetch the file described by file_info and stream it
         into writer.
 
         Args:
             path: Relative path of file in local cache
             file_info: The metadata of the file.
-            media_info: metadata of the media item
-            federation: Whether the requested media is for a federation request
 
         Returns:
             Returns a Responder if the provider has the file, otherwise returns None.
@@ -134,13 +124,7 @@ async def store() -> None:
             run_in_background(store)
 
     @trace_with_opname("StorageProviderWrapper.fetch")
-    async def fetch(
-        self,
-        path: str,
-        file_info: FileInfo,
-        media_info: Optional[LocalMedia] = None,
-        federation: bool = False,
-    ) -> Optional[Responder]:
+    async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
         if file_info.url_cache:
             # Files in the URL preview cache definitely aren't stored here,
             # so avoid any potentially slow I/O or network access.
@@ -148,9 +132,7 @@ async def fetch(
 
         # store_file is supposed to return an Awaitable, but guard
         # against improper implementations.
-        return await maybe_awaitable(
-            self.backend.fetch(path, file_info, media_info, federation)
-        )
+        return await maybe_awaitable(self.backend.fetch(path, file_info))
 
 
 class FileStorageProviderBackend(StorageProvider):
@@ -190,23 +172,11 @@ async def store_file(self, path: str, file_info: FileInfo) -> None:
             )
 
     @trace_with_opname("FileStorageProviderBackend.fetch")
-    async def fetch(
-        self,
-        path: str,
-        file_info: FileInfo,
-        media_info: Optional[LocalMedia] = None,
-        federation: bool = False,
-    ) -> Optional[Responder]:
+    async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
         """See StorageProvider.fetch"""
 
         backup_fname = os.path.join(self.base_directory, path)
         if os.path.isfile(backup_fname):
-            if federation:
-                assert media_info is not None
-                boundary = uuid4().hex.encode("ascii")
-                return MultipartResponder(
-                    open(backup_fname, "rb"), media_info, boundary
-                )
             return FileResponder(open(backup_fname, "rb"))
 
         return None
diff --git a/tests/federation/test_federation_media.py b/tests/federation/test_federation_media.py
deleted file mode 100644
index 1c89d19e99e..00000000000
--- a/tests/federation/test_federation_media.py
+++ /dev/null
@@ -1,234 +0,0 @@
-#
-# This file is licensed under the Affero General Public License (AGPL) version 3.
-#
-# Copyright (C) 2024 New Vector, Ltd
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# See the GNU Affero General Public License for more details:
-# <https://www.gnu.org/licenses/agpl-3.0.html>.
-#
-# Originally licensed under the Apache License, Version 2.0:
-# <http://www.apache.org/licenses/LICENSE-2.0>.
-#
-# [This file includes modifications made by New Vector Limited]
-#
-#
-import io
-import os
-import shutil
-import tempfile
-from typing import Optional
-
-from twisted.test.proto_helpers import MemoryReactor
-
-from synapse.media._base import FileInfo, Responder
-from synapse.media.filepath import MediaFilePaths
-from synapse.media.media_storage import MediaStorage
-from synapse.media.storage_provider import (
-    FileStorageProviderBackend,
-    StorageProviderWrapper,
-)
-from synapse.server import HomeServer
-from synapse.storage.databases.main.media_repository import LocalMedia
-from synapse.types import JsonDict, UserID
-from synapse.util import Clock
-
-from tests import unittest
-from tests.test_utils import SMALL_PNG
-from tests.unittest import override_config
-
-
-class FederationUnstableMediaDownloadsTest(unittest.FederatingHomeserverTestCase):
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        super().prepare(reactor, clock, hs)
-        self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-")
-        self.addCleanup(shutil.rmtree, self.test_dir)
-        self.primary_base_path = os.path.join(self.test_dir, "primary")
-        self.secondary_base_path = os.path.join(self.test_dir, "secondary")
-
-        hs.config.media.media_store_path = self.primary_base_path
-
-        storage_providers = [
-            StorageProviderWrapper(
-                FileStorageProviderBackend(hs, self.secondary_base_path),
-                store_local=True,
-                store_remote=False,
-                store_synchronous=True,
-            )
-        ]
-
-        self.filepaths = MediaFilePaths(self.primary_base_path)
-        self.media_storage = MediaStorage(
-            hs, self.primary_base_path, self.filepaths, storage_providers
-        )
-        self.media_repo = hs.get_media_repository()
-
-    @override_config(
-        {"experimental_features": {"msc3916_authenticated_media_enabled": True}}
-    )
-    def test_file_download(self) -> None:
-        content = io.BytesIO(b"file_to_stream")
-        content_uri = self.get_success(
-            self.media_repo.create_content(
-                "text/plain",
-                "test_upload",
-                content,
-                46,
-                UserID.from_string("@user_id:whatever.org"),
-            )
-        )
-        # test with a text file
-        channel = self.make_signed_federation_request(
-            "GET",
-            f"/_matrix/federation/unstable/org.matrix.msc3916/media/download/{content_uri.media_id}",
-        )
-        self.pump()
-        self.assertEqual(200, channel.code)
-
-        content_type = channel.headers.getRawHeaders("content-type")
-        assert content_type is not None
-        assert "multipart/mixed" in content_type[0]
-        assert "boundary" in content_type[0]
-
-        # extract boundary
-        boundary = content_type[0].split("boundary=")[1]
-        # split on boundary and check that json field and expected value exist
-        stripped = channel.text_body.split("\r\n" + "--" + boundary)
-        # TODO: the json object expected will change once MSC3911 is implemented, currently
-        # {} is returned for all requests as a placeholder (per MSC3196)
-        found_json = any(
-            "\r\nContent-Type: application/json\r\n{}" in field for field in stripped
-        )
-        self.assertTrue(found_json)
-
-        # check that text file and expected value exist
-        found_file = any(
-            "\r\nContent-Type: text/plain\r\nfile_to_stream" in field
-            for field in stripped
-        )
-        self.assertTrue(found_file)
-
-        content = io.BytesIO(SMALL_PNG)
-        content_uri = self.get_success(
-            self.media_repo.create_content(
-                "image/png",
-                "test_png_upload",
-                content,
-                67,
-                UserID.from_string("@user_id:whatever.org"),
-            )
-        )
-        # test with an image file
-        channel = self.make_signed_federation_request(
-            "GET",
-            f"/_matrix/federation/unstable/org.matrix.msc3916/media/download/{content_uri.media_id}",
-        )
-        self.pump()
-        self.assertEqual(200, channel.code)
-
-        content_type = channel.headers.getRawHeaders("content-type")
-        assert content_type is not None
-        assert "multipart/mixed" in content_type[0]
-        assert "boundary" in content_type[0]
-
-        # extract boundary
-        boundary = content_type[0].split("boundary=")[1]
-        # split on boundary and check that json field and expected value exist
-        body = channel.result.get("body")
-        assert body is not None
-        stripped_bytes = body.split(b"\r\n" + b"--" + boundary.encode("utf-8"))
-        found_json = any(
-            b"\r\nContent-Type: application/json\r\n{}" in field
-            for field in stripped_bytes
-        )
-        self.assertTrue(found_json)
-
-        # check that png file exists and matches what was uploaded
-        found_file = any(SMALL_PNG in field for field in stripped_bytes)
-        self.assertTrue(found_file)
-
-    @override_config(
-        {"experimental_features": {"msc3916_authenticated_media_enabled": False}}
-    )
-    def test_disable_config(self) -> None:
-        content = io.BytesIO(b"file_to_stream")
-        content_uri = self.get_success(
-            self.media_repo.create_content(
-                "text/plain",
-                "test_upload",
-                content,
-                46,
-                UserID.from_string("@user_id:whatever.org"),
-            )
-        )
-        channel = self.make_signed_federation_request(
-            "GET",
-            f"/_matrix/federation/unstable/org.matrix.msc3916/media/download/{content_uri.media_id}",
-        )
-        self.pump()
-        self.assertEqual(404, channel.code)
-        self.assertEqual(channel.json_body.get("errcode"), "M_UNRECOGNIZED")
-
-
-class FakeFileStorageProviderBackend:
-    """
-    Fake storage provider stub with incompatible `fetch` signature for testing
-    """
-
-    def __init__(self, hs: "HomeServer", config: str):
-        self.hs = hs
-        self.cache_directory = hs.config.media.media_store_path
-        self.base_directory = config
-
-    def __str__(self) -> str:
-        return "FakeFileStorageProviderBackend[%s]" % (self.base_directory,)
-
-    async def fetch(
-        self, path: str, file_info: FileInfo, media_info: Optional[LocalMedia] = None
-    ) -> Optional[Responder]:
-        pass
-
-
-TEST_DIR = tempfile.mkdtemp(prefix="synapse-tests-")
-
-
-class FederationUnstableMediaEndpointCompatibilityTest(
-    unittest.FederatingHomeserverTestCase
-):
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        super().prepare(reactor, clock, hs)
-        self.test_dir = TEST_DIR
-        self.addCleanup(shutil.rmtree, self.test_dir)
-        self.media_repo = hs.get_media_repository()
-
-    def default_config(self) -> JsonDict:
-        config = super().default_config()
-        primary_base_path = os.path.join(TEST_DIR, "primary")
-        config["media_storage_providers"] = [
-            {
-                "module": "tests.federation.test_federation_media.FakeFileStorageProviderBackend",
-                "store_local": "True",
-                "store_remote": "False",
-                "store_synchronous": "False",
-                "config": {"directory": primary_base_path},
-            }
-        ]
-        return config
-
-    @override_config(
-        {"experimental_features": {"msc3916_authenticated_media_enabled": True}}
-    )
-    def test_incompatible_storage_provider_fails_to_load_endpoint(self) -> None:
-        channel = self.make_signed_federation_request(
-            "GET",
-            "/_matrix/federation/unstable/org.matrix.msc3916/media/download/xyz",
-        )
-        self.pump()
-        self.assertEqual(404, channel.code)
-        self.assertEqual(channel.json_body.get("errcode"), "M_UNRECOGNIZED")
diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py
index 47a89e9c66e..46d20ce7755 100644
--- a/tests/media/test_media_storage.py
+++ b/tests/media/test_media_storage.py
@@ -49,10 +49,7 @@
 from synapse.media._base import FileInfo, ThumbnailInfo
 from synapse.media.filepath import MediaFilePaths
 from synapse.media.media_storage import MediaStorage, ReadableFileWrapper
-from synapse.media.storage_provider import (
-    FileStorageProviderBackend,
-    StorageProviderWrapper,
-)
+from synapse.media.storage_provider import FileStorageProviderBackend
 from synapse.media.thumbnailer import ThumbnailProvider
 from synapse.module_api import ModuleApi
 from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
@@ -81,14 +78,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
 
         hs.config.media.media_store_path = self.primary_base_path
 
-        storage_providers = [
-            StorageProviderWrapper(
-                FileStorageProviderBackend(hs, self.secondary_base_path),
-                store_local=True,
-                store_remote=False,
-                store_synchronous=True,
-            )
-        ]
+        storage_providers = [FileStorageProviderBackend(hs, self.secondary_base_path)]
 
         self.filepaths = MediaFilePaths(self.primary_base_path)
         self.media_storage = MediaStorage(

From b6e944140951ea4ed6e69a672c6bc0273491aa17 Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Tue, 9 Jul 2024 21:25:06 +0100
Subject: [PATCH 259/278] Only log once when waiting for current token to catch
 up

Rather than once every 0.5s
---
 synapse/notifier.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/synapse/notifier.py b/synapse/notifier.py
index c3ecf86ec4f..7a2b54036c3 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -773,6 +773,7 @@ async def wait_for_stream_token(self, stream_token: StreamToken) -> bool:
         stream_token = await self.event_sources.bound_future_token(stream_token)
 
         start = self.clock.time_msec()
+        logged = False
         while True:
             current_token = self.event_sources.get_current_token()
             if stream_token.is_before_or_eq(current_token):
@@ -783,11 +784,13 @@ async def wait_for_stream_token(self, stream_token: StreamToken) -> bool:
             if now - start > 10_000:
                 return False
 
-            logger.info(
-                "Waiting for current token to reach %s; currently at %s",
-                stream_token,
-                current_token,
-            )
+            if not logged:
+                logger.info(
+                    "Waiting for current token to reach %s; currently at %s",
+                    stream_token,
+                    current_token,
+                )
+                logged = True
 
             # TODO: be better
             await self.clock.sleep(0.5)

From b2200961473e855433fb3db2b44910379972c71c Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Tue, 30 Jul 2024 11:51:26 +0100
Subject: [PATCH 260/278] Update locked dependency on Twisted to 24.7.0rc1

Pulls in fix for
https://github.com/twisted/twisted/security/advisories/GHSA-c8m8-j448-xjx7
---
 poetry.lock | 63 +++++++++++++++++++----------------------------------
 1 file changed, 22 insertions(+), 41 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 19393bb6b39..8f64ada3466 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
 
 [[package]]
 name = "annotated-types"
@@ -836,18 +836,21 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec
 
 [[package]]
 name = "incremental"
-version = "22.10.0"
-description = "\"A small library that versions your Python projects.\""
+version = "24.7.2"
+description = "A small library that versions your Python projects."
 optional = false
-python-versions = "*"
+python-versions = ">=3.8"
 files = [
-    {file = "incremental-22.10.0-py2.py3-none-any.whl", hash = "sha256:b864a1f30885ee72c5ac2835a761b8fe8aa9c28b9395cacf27286602688d3e51"},
-    {file = "incremental-22.10.0.tar.gz", hash = "sha256:912feeb5e0f7e0188e6f42241d2f450002e11bbc0937c65865045854c24c0bd0"},
+    {file = "incremental-24.7.2-py3-none-any.whl", hash = "sha256:8cb2c3431530bec48ad70513931a760f446ad6c25e8333ca5d95e24b0ed7b8fe"},
+    {file = "incremental-24.7.2.tar.gz", hash = "sha256:fb4f1d47ee60efe87d4f6f0ebb5f70b9760db2b2574c59c8e8912be4ebd464c9"},
 ]
 
+[package.dependencies]
+setuptools = ">=61.0"
+tomli = {version = "*", markers = "python_version < \"3.11\""}
+
 [package.extras]
-mypy = ["click (>=6.0)", "mypy (==0.812)", "twisted (>=16.4.0)"]
-scripts = ["click (>=6.0)", "twisted (>=16.4.0)"]
+scripts = ["click (>=6.0)"]
 
 [[package]]
 name = "isort"
@@ -2726,13 +2729,13 @@ urllib3 = ">=1.26.0"
 
 [[package]]
 name = "twisted"
-version = "24.3.0"
+version = "24.7.0rc1"
 description = "An asynchronous networking framework written in Python"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "twisted-24.3.0-py3-none-any.whl", hash = "sha256:039f2e6a49ab5108abd94de187fa92377abe5985c7a72d68d0ad266ba19eae63"},
-    {file = "twisted-24.3.0.tar.gz", hash = "sha256:6b38b6ece7296b5e122c9eb17da2eeab3d98a198f50ca9efd00fb03e5b4fd4ae"},
+    {file = "twisted-24.7.0rc1-py3-none-any.whl", hash = "sha256:f37d6656fe4e2871fab29d8952ae90bd6ca8b48a9e4dfa1b348f4cd62e6ba0bb"},
+    {file = "twisted-24.7.0rc1.tar.gz", hash = "sha256:bbc4a2193ca34cfa32f626300746698a6d70fcd77d9c0b79a664c347e39634fc"},
 ]
 
 [package.dependencies]
@@ -2741,48 +2744,26 @@ automat = ">=0.8.0"
 constantly = ">=15.1"
 hyperlink = ">=17.1.1"
 idna = {version = ">=2.4", optional = true, markers = "extra == \"tls\""}
-incremental = ">=22.10.0"
+incremental = ">=24.7.0"
 pyopenssl = {version = ">=21.0.0", optional = true, markers = "extra == \"tls\""}
 service-identity = {version = ">=18.1.0", optional = true, markers = "extra == \"tls\""}
-twisted-iocpsupport = {version = ">=1.0.2,<2", markers = "platform_system == \"Windows\""}
 typing-extensions = ">=4.2.0"
 zope-interface = ">=5"
 
 [package.extras]
-all-non-platform = ["twisted[conch,http2,serial,test,tls]", "twisted[conch,http2,serial,test,tls]"]
+all-non-platform = ["appdirs (>=1.4.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)", "cryptography (>=3.3)", "cython-test-exception-raiser (>=1.0.2,<2)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.56)", "hypothesis (>=6.56)", "idna (>=2.4)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "priority (>=1.1.0,<2.0)", "pyhamcrest (>=2)", "pyhamcrest (>=2)", "pyopenssl (>=21.0.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226)", "service-identity (>=18.1.0)", "service-identity (>=18.1.0)"]
 conch = ["appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)"]
-dev = ["coverage (>=6b1,<7)", "pyflakes (>=2.2,<3.0)", "python-subunit (>=1.4,<2.0)", "twisted[dev-release]", "twistedchecker (>=0.7,<1.0)"]
+dev = ["coverage (>=7.5,<8.0)", "cython-test-exception-raiser (>=1.0.2,<2)", "hypothesis (>=6.56)", "pydoctor (>=23.9.0,<23.10.0)", "pyflakes (>=2.2,<3.0)", "pyhamcrest (>=2)", "python-subunit (>=1.4,<2.0)", "sphinx (>=6,<7)", "sphinx-rtd-theme (>=1.3,<2.0)", "towncrier (>=23.6,<24.0)", "twistedchecker (>=0.7,<1.0)"]
 dev-release = ["pydoctor (>=23.9.0,<23.10.0)", "pydoctor (>=23.9.0,<23.10.0)", "sphinx (>=6,<7)", "sphinx (>=6,<7)", "sphinx-rtd-theme (>=1.3,<2.0)", "sphinx-rtd-theme (>=1.3,<2.0)", "towncrier (>=23.6,<24.0)", "towncrier (>=23.6,<24.0)"]
-gtk-platform = ["pygobject", "pygobject", "twisted[all-non-platform]", "twisted[all-non-platform]"]
+gtk-platform = ["appdirs (>=1.4.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)", "cryptography (>=3.3)", "cython-test-exception-raiser (>=1.0.2,<2)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.56)", "hypothesis (>=6.56)", "idna (>=2.4)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "priority (>=1.1.0,<2.0)", "pygobject", "pygobject", "pyhamcrest (>=2)", "pyhamcrest (>=2)", "pyopenssl (>=21.0.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226)", "service-identity (>=18.1.0)", "service-identity (>=18.1.0)"]
 http2 = ["h2 (>=3.0,<5.0)", "priority (>=1.1.0,<2.0)"]
-macos-platform = ["pyobjc-core", "pyobjc-core", "pyobjc-framework-cfnetwork", "pyobjc-framework-cfnetwork", "pyobjc-framework-cocoa", "pyobjc-framework-cocoa", "twisted[all-non-platform]", "twisted[all-non-platform]"]
-mypy = ["mypy (>=1.8,<2.0)", "mypy-zope (>=1.0.3,<1.1.0)", "twisted[all-non-platform,dev]", "types-pyopenssl", "types-setuptools"]
-osx-platform = ["twisted[macos-platform]", "twisted[macos-platform]"]
+macos-platform = ["appdirs (>=1.4.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)", "cryptography (>=3.3)", "cython-test-exception-raiser (>=1.0.2,<2)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.56)", "hypothesis (>=6.56)", "idna (>=2.4)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "priority (>=1.1.0,<2.0)", "pyhamcrest (>=2)", "pyhamcrest (>=2)", "pyobjc-core", "pyobjc-core", "pyobjc-framework-cfnetwork", "pyobjc-framework-cfnetwork", "pyobjc-framework-cocoa", "pyobjc-framework-cocoa", "pyopenssl (>=21.0.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226)", "service-identity (>=18.1.0)", "service-identity (>=18.1.0)"]
+mypy = ["appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "coverage (>=7.5,<8.0)", "cryptography (>=3.3)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.56)", "idna (>=2.4)", "mypy (>=1.8,<2.0)", "mypy-zope (>=1.0.3,<1.1.0)", "priority (>=1.1.0,<2.0)", "pydoctor (>=23.9.0,<23.10.0)", "pyflakes (>=2.2,<3.0)", "pyhamcrest (>=2)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "python-subunit (>=1.4,<2.0)", "pywin32 (!=226)", "service-identity (>=18.1.0)", "sphinx (>=6,<7)", "sphinx-rtd-theme (>=1.3,<2.0)", "towncrier (>=23.6,<24.0)", "twistedchecker (>=0.7,<1.0)", "types-pyopenssl", "types-setuptools"]
+osx-platform = ["appdirs (>=1.4.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)", "cryptography (>=3.3)", "cython-test-exception-raiser (>=1.0.2,<2)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.56)", "hypothesis (>=6.56)", "idna (>=2.4)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "priority (>=1.1.0,<2.0)", "pyhamcrest (>=2)", "pyhamcrest (>=2)", "pyobjc-core", "pyobjc-core", "pyobjc-framework-cfnetwork", "pyobjc-framework-cfnetwork", "pyobjc-framework-cocoa", "pyobjc-framework-cocoa", "pyopenssl (>=21.0.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226)", "service-identity (>=18.1.0)", "service-identity (>=18.1.0)"]
 serial = ["pyserial (>=3.0)", "pywin32 (!=226)"]
 test = ["cython-test-exception-raiser (>=1.0.2,<2)", "hypothesis (>=6.56)", "pyhamcrest (>=2)"]
 tls = ["idna (>=2.4)", "pyopenssl (>=21.0.0)", "service-identity (>=18.1.0)"]
-windows-platform = ["pywin32 (!=226)", "pywin32 (!=226)", "twisted[all-non-platform]", "twisted[all-non-platform]"]
-
-[[package]]
-name = "twisted-iocpsupport"
-version = "1.0.2"
-description = "An extension for use in the twisted I/O Completion Ports reactor."
-optional = false
-python-versions = "*"
-files = [
-    {file = "twisted-iocpsupport-1.0.2.tar.gz", hash = "sha256:72068b206ee809c9c596b57b5287259ea41ddb4774d86725b19f35bf56aa32a9"},
-    {file = "twisted_iocpsupport-1.0.2-cp310-cp310-win32.whl", hash = "sha256:985c06a33f5c0dae92c71a036d1ea63872ee86a21dd9b01e1f287486f15524b4"},
-    {file = "twisted_iocpsupport-1.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:81b3abe3527b367da0220482820cb12a16c661672b7bcfcde328902890d63323"},
-    {file = "twisted_iocpsupport-1.0.2-cp36-cp36m-win32.whl", hash = "sha256:9dbb8823b49f06d4de52721b47de4d3b3026064ef4788ce62b1a21c57c3fff6f"},
-    {file = "twisted_iocpsupport-1.0.2-cp36-cp36m-win_amd64.whl", hash = "sha256:b9fed67cf0f951573f06d560ac2f10f2a4bbdc6697770113a2fc396ea2cb2565"},
-    {file = "twisted_iocpsupport-1.0.2-cp37-cp37m-win32.whl", hash = "sha256:b76b4eed9b27fd63ddb0877efdd2d15835fdcb6baa745cb85b66e5d016ac2878"},
-    {file = "twisted_iocpsupport-1.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:851b3735ca7e8102e661872390e3bce88f8901bece95c25a0c8bb9ecb8a23d32"},
-    {file = "twisted_iocpsupport-1.0.2-cp38-cp38-win32.whl", hash = "sha256:bf4133139d77fc706d8f572e6b7d82871d82ec7ef25d685c2351bdacfb701415"},
-    {file = "twisted_iocpsupport-1.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:306becd6e22ab6e8e4f36b6bdafd9c92e867c98a5ce517b27fdd27760ee7ae41"},
-    {file = "twisted_iocpsupport-1.0.2-cp39-cp39-win32.whl", hash = "sha256:3c61742cb0bc6c1ac117a7e5f422c129832f0c295af49e01d8a6066df8cfc04d"},
-    {file = "twisted_iocpsupport-1.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:b435857b9efcbfc12f8c326ef0383f26416272260455bbca2cd8d8eca470c546"},
-    {file = "twisted_iocpsupport-1.0.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:7d972cfa8439bdcb35a7be78b7ef86d73b34b808c74be56dfa785c8a93b851bf"},
-]
+windows-platform = ["appdirs (>=1.4.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)", "cryptography (>=3.3)", "cython-test-exception-raiser (>=1.0.2,<2)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.56)", "hypothesis (>=6.56)", "idna (>=2.4)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "priority (>=1.1.0,<2.0)", "pyhamcrest (>=2)", "pyhamcrest (>=2)", "pyopenssl (>=21.0.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226)", "pywin32 (!=226)", "pywin32 (!=226)", "service-identity (>=18.1.0)", "service-identity (>=18.1.0)", "twisted-iocpsupport (>=1.0.2)", "twisted-iocpsupport (>=1.0.2)"]
 
 [[package]]
 name = "txredisapi"

From 3b8ed3016356a1250e1b24a3e64f095b3ea0c8b0 Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Tue, 30 Jul 2024 12:20:52 +0100
Subject: [PATCH 261/278] Fix signature and bug in `writeHeaders` in the tests

---
 tests/server.py | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/tests/server.py b/tests/server.py
index f1cd0f76be8..38ca0950736 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -198,17 +198,35 @@ def code(self) -> int:
     def headers(self) -> Headers:
         if not self.result:
             raise Exception("No result yet.")
-        h = Headers()
-        for i in self.result["headers"]:
-            h.addRawHeader(*i)
+
+        h = self.result["headers"]
+        assert isinstance(h, Headers)
         return h
 
     def writeHeaders(
-        self, version: bytes, code: bytes, reason: bytes, headers: Headers
+        self,
+        version: bytes,
+        code: bytes,
+        reason: bytes,
+        headers: Union[Headers, List[Tuple[bytes, bytes]]],
     ) -> None:
         self.result["version"] = version
         self.result["code"] = code
         self.result["reason"] = reason
+
+        if isinstance(headers, list):
+            # Support prior to Twisted 24.7.0rc1
+            new_headers = Headers()
+            for k, v in headers:
+                assert isinstance(k, bytes), f"key is not of type bytes: {k!r}"
+                assert isinstance(v, bytes), f"value is not of type bytes: {v!r}"
+                new_headers.addRawHeader(k, v)
+            headers = new_headers
+
+        assert isinstance(
+            headers, Headers
+        ), f"headers are of the wrong type: {headers!r}"
+
         self.result["headers"] = headers
 
     def write(self, data: bytes) -> None:

From 98876d342f7b49ace4ecbb185d4d72810b38ae7d Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Tue, 30 Jul 2024 12:40:16 +0100
Subject: [PATCH 262/278] Fix tests relying on headers not being Headers

---
 tests/rest/client/test_login.py | 5 ++---
 tests/test_server.py            | 6 +++---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index 3fb77fd9dde..3a998387197 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -969,9 +969,8 @@ def test_cas_redirect_confirm(self) -> None:
         # Test that the response is HTML.
         self.assertEqual(channel.code, 200, channel.result)
         content_type_header_value = ""
-        for header in channel.result.get("headers", []):
-            if header[0] == b"Content-Type":
-                content_type_header_value = header[1].decode("utf8")
+        for header in channel.headers.getRawHeaders("Content-Type"):
+            content_type_header_value = header
 
         self.assertTrue(content_type_header_value.startswith("text/html"))
 
diff --git a/tests/test_server.py b/tests/test_server.py
index 0910ea5f280..45015ca6533 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -393,7 +393,7 @@ async def callback(request: SynapseRequest, **kwargs: object) -> None:
 
         self.assertEqual(channel.code, 301)
         headers = channel.result["headers"]
-        location_headers = [v for k, v in headers if k == b"Location"]
+        location_headers = headers.getRawHeaders(b"Location", [])
         self.assertEqual(location_headers, [b"/look/an/eagle"])
 
     def test_redirect_exception_with_cookie(self) -> None:
@@ -416,9 +416,9 @@ async def callback(request: SynapseRequest, **kwargs: object) -> NoReturn:
 
         self.assertEqual(channel.code, 304)
         headers = channel.result["headers"]
-        location_headers = [v for k, v in headers if k == b"Location"]
+        location_headers = headers.getRawHeaders(b"Location", [])
         self.assertEqual(location_headers, [b"/no/over/there"])
-        cookies_headers = [v for k, v in headers if k == b"Set-Cookie"]
+        cookies_headers = headers.getRawHeaders(b"Set-Cookie", [])
         self.assertEqual(cookies_headers, [b"session=yespls"])
 
     def test_head_request(self) -> None:

From d216a956b6f8b30be09f0b4952504d2d049b30b5 Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Tue, 30 Jul 2024 12:42:18 +0100
Subject: [PATCH 263/278] Slightly simplify some tests whilst reducing
 'Any'-typing

---
 tests/test_server.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/test_server.py b/tests/test_server.py
index 45015ca6533..9ff2589497c 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -392,8 +392,7 @@ async def callback(request: SynapseRequest, **kwargs: object) -> None:
         )
 
         self.assertEqual(channel.code, 301)
-        headers = channel.result["headers"]
-        location_headers = headers.getRawHeaders(b"Location", [])
+        location_headers = channel.headers.getRawHeaders(b"Location", [])
         self.assertEqual(location_headers, [b"/look/an/eagle"])
 
     def test_redirect_exception_with_cookie(self) -> None:
@@ -415,7 +414,7 @@ async def callback(request: SynapseRequest, **kwargs: object) -> NoReturn:
         )
 
         self.assertEqual(channel.code, 304)
-        headers = channel.result["headers"]
+        headers = channel.headers
         location_headers = headers.getRawHeaders(b"Location", [])
         self.assertEqual(location_headers, [b"/no/over/there"])
         cookies_headers = headers.getRawHeaders(b"Set-Cookie", [])

From 48d92ed0f6f61113457a713c3cb8ced5aaf06767 Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Tue, 30 Jul 2024 13:08:55 +0100
Subject: [PATCH 264/278] Make HTTP proxy support changes in Twisted 24.7.0rc1

---
 synapse/http/proxy.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/synapse/http/proxy.py b/synapse/http/proxy.py
index 5b5ded757be..04d8b519fed 100644
--- a/synapse/http/proxy.py
+++ b/synapse/http/proxy.py
@@ -62,6 +62,15 @@
     "Upgrade",
 }
 
+if hasattr(Headers, "_canonicalNameCaps"):
+    # Twisted < 24.7.0rc1
+    _canonicalHeaderName = Headers()._canonicalNameCaps
+else:
+    # Twisted >= 24.7.0rc1
+    # But note that `_encodeName` still exists on prior versions,
+    # it just encodes differently
+    _canonicalHeaderName = Headers()._encodeName
+
 
 def parse_connection_header_value(
     connection_header_value: Optional[bytes],
@@ -85,11 +94,10 @@ def parse_connection_header_value(
         The set of header names that should not be copied over from the remote response.
         The keys are capitalized in canonical capitalization.
     """
-    headers = Headers()
     extra_headers_to_remove: Set[str] = set()
     if connection_header_value:
         extra_headers_to_remove = {
-            headers._canonicalNameCaps(connection_option.strip()).decode("ascii")
+            _canonicalHeaderName(connection_option.strip()).decode("ascii")
             for connection_option in connection_header_value.split(b",")
         }
 

From 48bac0db0d8f4a18806090a8c12c18da26e9e858 Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Tue, 30 Jul 2024 13:34:51 +0100
Subject: [PATCH 265/278] Newsfile

Signed-off-by: Olivier 'reivilibre <oliverw@matrix.org>
---
 changelog.d/17502.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/17502.bugfix

diff --git a/changelog.d/17502.bugfix b/changelog.d/17502.bugfix
new file mode 100644
index 00000000000..bf1da4e0444
--- /dev/null
+++ b/changelog.d/17502.bugfix
@@ -0,0 +1 @@
+Upgrade locked dependency on Twisted to 24.7.0rc1.
\ No newline at end of file

From a301c634268947732ff19aa165373dcb2e28a4d7 Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Tue, 30 Jul 2024 14:54:42 +0100
Subject: [PATCH 266/278] Fix and hush Mypy errors

---
 synapse/http/proxy.py           | 2 +-
 synapse/http/server.py          | 4 +++-
 synapse/http/site.py            | 2 +-
 tests/rest/client/test_login.py | 2 +-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/synapse/http/proxy.py b/synapse/http/proxy.py
index 04d8b519fed..97aa429e7d4 100644
--- a/synapse/http/proxy.py
+++ b/synapse/http/proxy.py
@@ -64,7 +64,7 @@
 
 if hasattr(Headers, "_canonicalNameCaps"):
     # Twisted < 24.7.0rc1
-    _canonicalHeaderName = Headers()._canonicalNameCaps
+    _canonicalHeaderName = Headers()._canonicalNameCaps  # type: ignore[attr-defined]
 else:
     # Twisted >= 24.7.0rc1
     # But note that `_encodeName` still exists on prior versions,
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 211795dc396..0d0c610b284 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -74,6 +74,7 @@
 from synapse.config.homeserver import HomeServerConfig
 from synapse.logging.context import defer_to_thread, preserve_fn, run_in_background
 from synapse.logging.opentracing import active_span, start_active_span, trace_servlet
+from synapse.types import ISynapseReactor
 from synapse.util import json_encoder
 from synapse.util.caches import intern_dict
 from synapse.util.cancellation import is_function_cancellable
@@ -868,7 +869,8 @@ def encode(opentracing_span: "Optional[opentracing.Span]") -> bytes:
 
     with start_active_span("encode_json_response"):
         span = active_span()
-        json_str = await defer_to_thread(request.reactor, encode, span)
+        reactor: ISynapseReactor = request.reactor  # type: ignore
+        json_str = await defer_to_thread(reactor, encode, span)
 
     _write_bytes_to_request(request, json_str)
 
diff --git a/synapse/http/site.py b/synapse/http/site.py
index a5b5780679a..af169ba51e6 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -683,7 +683,7 @@ def request_factory(channel: HTTPChannel, queued: bool) -> Request:
         self.access_logger = logging.getLogger(logger_name)
         self.server_version_string = server_version_string.encode("ascii")
 
-    def log(self, request: SynapseRequest) -> None:
+    def log(self, request: SynapseRequest) -> None:  # type: ignore[override]
         pass
 
 
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index 3a998387197..2b1e44381b6 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -969,7 +969,7 @@ def test_cas_redirect_confirm(self) -> None:
         # Test that the response is HTML.
         self.assertEqual(channel.code, 200, channel.result)
         content_type_header_value = ""
-        for header in channel.headers.getRawHeaders("Content-Type"):
+        for header in channel.headers.getRawHeaders("Content-Type", []):
             content_type_header_value = header
 
         self.assertTrue(content_type_header_value.startswith("text/html"))

From dc03039b707c19c0d35b0241a0bff3b368f2d515 Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Tue, 30 Jul 2024 14:55:27 +0100
Subject: [PATCH 267/278] Change to misc

---
 changelog.d/{17502.bugfix => 17502.misc} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename changelog.d/{17502.bugfix => 17502.misc} (100%)

diff --git a/changelog.d/17502.bugfix b/changelog.d/17502.misc
similarity index 100%
rename from changelog.d/17502.bugfix
rename to changelog.d/17502.misc

From 5e16820e4c62f07883785797b2a0e6d317d40692 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Tue, 13 Aug 2024 18:29:56 +0100
Subject: [PATCH 268/278] Register the media threadpool with our metrics

Otherwise it won't be visible.
---
 synapse/server.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/synapse/server.py b/synapse/server.py
index 8b07bb39a01..d6c9cbdac06 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -124,6 +124,7 @@
 )
 from synapse.http.matrixfederationclient import MatrixFederationHttpClient
 from synapse.media.media_repository import MediaRepository
+from synapse.metrics import register_threadpool
 from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager
 from synapse.module_api import ModuleApi
 from synapse.module_api.callbacks import ModuleApiCallbacks
@@ -959,4 +960,7 @@ def get_media_sender_thread_pool(self) -> ThreadPool:
             "during", "shutdown", media_threadpool.stop
         )
 
+        # Register the threadpool with our metrics.
+        register_threadpool("media", media_threadpool)
+
         return media_threadpool

From f0ebe2f2151ecd92e4bc11d75e9082f443f11555 Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Thu, 22 Aug 2024 14:16:10 +0100
Subject: [PATCH 269/278] Remove images from message notification e-mails

---
 synapse/res/templates/notif.html | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/synapse/res/templates/notif.html b/synapse/res/templates/notif.html
index 7d86681fed5..39f05b85a33 100644
--- a/synapse/res/templates/notif.html
+++ b/synapse/res/templates/notif.html
@@ -1,19 +1,6 @@
 {%- for message in notif.messages %}
     <tr class="{{ "historical_message" if message.is_historical else "message" }}">
         <td class="sender_avatar">
-            {%- if loop.index0 == 0 or notif.messages[loop.index0 - 1].sender_name != notif.messages[loop.index0].sender_name %}
-                {%- if message.sender_avatar_url %}
-                    <img alt="" class="sender_avatar" src="{{ message.sender_avatar_url|mxc_to_http(32,32) }}"  />
-                {%- else %}
-                    {%- if message.sender_hash % 3 == 0 %}
-                        <img class="sender_avatar" src="https://riot.im/img/external/avatar-1.png"  />
-                    {%- elif message.sender_hash % 3 == 1 %}
-                        <img class="sender_avatar" src="https://riot.im/img/external/avatar-2.png"  />
-                    {%- else %}
-                        <img class="sender_avatar" src="https://riot.im/img/external/avatar-3.png"  />
-                    {%- endif %}
-                {%- endif %}
-            {%- endif %}
         </td>
         <td class="message_contents">
             {%- if loop.index0 == 0 or notif.messages[loop.index0 - 1].sender_name != notif.messages[loop.index0].sender_name %}
@@ -30,7 +17,7 @@
                     {%- elif message.msgtype == "m.notice" %}
                         {{ message.body_text_html }}
                     {%- elif message.msgtype == "m.image" and message.image_url %}
-                        <img src="{{ message.image_url|mxc_to_http(640, 480, 'scale') }}" />
+                        <span class="filename">{{ message.body_text_plain }} (image)</span>
                     {%- elif message.msgtype == "m.file" %}
                         <span class="filename">{{ message.body_text_plain }}</span>
                     {%- else %}

From add2f8d03ced68ca49348a899eb1ebdff3a5ee54 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Thu, 30 Jan 2025 17:14:21 +0000
Subject: [PATCH 270/278] Move (un)block_room storage function to
 RoomWorkerStore

This is so workers can call these functions.
---
 synapse/storage/databases/main/room.py | 88 +++++++++++++-------------
 1 file changed, 44 insertions(+), 44 deletions(-)

diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 2522bebd728..d673adba164 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -1181,6 +1181,50 @@ def _quarantine_media_txn(
 
         return total_media_quarantined
 
+    async def block_room(self, room_id: str, user_id: str) -> None:
+        """Marks the room as blocked.
+
+        Can be called multiple times (though we'll only track the last user to
+        block this room).
+
+        Can be called on a room unknown to this homeserver.
+
+        Args:
+            room_id: Room to block
+            user_id: Who blocked it
+        """
+        await self.db_pool.simple_upsert(
+            table="blocked_rooms",
+            keyvalues={"room_id": room_id},
+            values={},
+            insertion_values={"user_id": user_id},
+            desc="block_room",
+        )
+        await self.db_pool.runInteraction(
+            "block_room_invalidation",
+            self._invalidate_cache_and_stream,
+            self.is_room_blocked,
+            (room_id,),
+        )
+
+    async def unblock_room(self, room_id: str) -> None:
+        """Remove the room from blocking list.
+
+        Args:
+            room_id: Room to unblock
+        """
+        await self.db_pool.simple_delete(
+            table="blocked_rooms",
+            keyvalues={"room_id": room_id},
+            desc="unblock_room",
+        )
+        await self.db_pool.runInteraction(
+            "block_room_invalidation",
+            self._invalidate_cache_and_stream,
+            self.is_room_blocked,
+            (room_id,),
+        )
+
     async def get_rooms_for_retention_period_in_range(
         self, min_ms: Optional[int], max_ms: Optional[int], include_null: bool = False
     ) -> Dict[str, RetentionPolicy]:
@@ -2500,50 +2544,6 @@ async def add_room_report(
         )
         return next_id
 
-    async def block_room(self, room_id: str, user_id: str) -> None:
-        """Marks the room as blocked.
-
-        Can be called multiple times (though we'll only track the last user to
-        block this room).
-
-        Can be called on a room unknown to this homeserver.
-
-        Args:
-            room_id: Room to block
-            user_id: Who blocked it
-        """
-        await self.db_pool.simple_upsert(
-            table="blocked_rooms",
-            keyvalues={"room_id": room_id},
-            values={},
-            insertion_values={"user_id": user_id},
-            desc="block_room",
-        )
-        await self.db_pool.runInteraction(
-            "block_room_invalidation",
-            self._invalidate_cache_and_stream,
-            self.is_room_blocked,
-            (room_id,),
-        )
-
-    async def unblock_room(self, room_id: str) -> None:
-        """Remove the room from blocking list.
-
-        Args:
-            room_id: Room to unblock
-        """
-        await self.db_pool.simple_delete(
-            table="blocked_rooms",
-            keyvalues={"room_id": room_id},
-            desc="unblock_room",
-        )
-        await self.db_pool.runInteraction(
-            "block_room_invalidation",
-            self._invalidate_cache_and_stream,
-            self.is_room_blocked,
-            (room_id,),
-        )
-
     async def clear_partial_state_room(self, room_id: str) -> Optional[int]:
         """Clears the partial state flag for a room.
 

From a65e8640feefb66cc941525813267f0116e5cebc Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@element.io>
Date: Thu, 13 Mar 2025 15:01:15 -0600
Subject: [PATCH 271/278] Allow admins to see soft failed events

---
 synapse/visibility.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/synapse/visibility.py b/synapse/visibility.py
index dc7b6e4065e..d064950eb59 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -48,7 +48,7 @@
 from synapse.storage.controllers import StorageControllers
 from synapse.storage.databases.main import DataStore
 from synapse.synapse_rust.events import event_visible_to_server
-from synapse.types import RetentionPolicy, StateMap, StrCollection, get_domain_from_id
+from synapse.types import RetentionPolicy, StateMap, StrCollection, get_domain_from_id, UserID
 from synapse.types.state import StateFilter
 from synapse.util import Clock
 
@@ -106,9 +106,12 @@ async def filter_events_for_client(
         of `user_id` at each event.
     """
     # Filter out events that have been soft failed so that we don't relay them
-    # to clients.
+    # to clients, unless they're a server admin.
     events_before_filtering = events
-    events = [e for e in events if not e.internal_metadata.is_soft_failed()]
+    if filter_send_to_client and await storage.main.is_server_admin(UserID.from_string(user_id)):
+        events = events_before_filtering
+    else:
+        events = [e for e in events if not e.internal_metadata.is_soft_failed()]
     if len(events_before_filtering) != len(events):
         if filtered_event_logger.isEnabledFor(logging.DEBUG):
             filtered_event_logger.debug(

From f7a92226bccfedc9287b619500086629c9224428 Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@element.io>
Date: Thu, 13 Mar 2025 15:05:19 -0600
Subject: [PATCH 272/278] changelog

---
 changelog.d/18238.feature | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/18238.feature

diff --git a/changelog.d/18238.feature b/changelog.d/18238.feature
new file mode 100644
index 00000000000..d89f273fbee
--- /dev/null
+++ b/changelog.d/18238.feature
@@ -0,0 +1 @@
+Server admins will see [soft failed](https://spec.matrix.org/v1.13/server-server-api/#soft-failure) events over the Client-Server API.
\ No newline at end of file

From bf06f4260851a1fbec817a3fb201a674703af648 Mon Sep 17 00:00:00 2001
From: turt2live <1190097+turt2live@users.noreply.github.com>
Date: Thu, 13 Mar 2025 21:07:15 +0000
Subject: [PATCH 273/278] Attempt to fix linting

---
 synapse/visibility.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/synapse/visibility.py b/synapse/visibility.py
index d064950eb59..78d33f278b4 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -48,7 +48,13 @@
 from synapse.storage.controllers import StorageControllers
 from synapse.storage.databases.main import DataStore
 from synapse.synapse_rust.events import event_visible_to_server
-from synapse.types import RetentionPolicy, StateMap, StrCollection, get_domain_from_id, UserID
+from synapse.types import (
+    RetentionPolicy,
+    StateMap,
+    StrCollection,
+    UserID,
+    get_domain_from_id,
+)
 from synapse.types.state import StateFilter
 from synapse.util import Clock
 
@@ -108,7 +114,9 @@ async def filter_events_for_client(
     # Filter out events that have been soft failed so that we don't relay them
     # to clients, unless they're a server admin.
     events_before_filtering = events
-    if filter_send_to_client and await storage.main.is_server_admin(UserID.from_string(user_id)):
+    if filter_send_to_client and await storage.main.is_server_admin(
+        UserID.from_string(user_id)
+    ):
         events = events_before_filtering
     else:
         events = [e for e in events if not e.internal_metadata.is_soft_failed()]

From 4ac6aa79bd97e2196d72e19ffbb097fa406d5d33 Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@element.io>
Date: Thu, 13 Mar 2025 15:08:57 -0600
Subject: [PATCH 274/278] Empty commit to fix CI


From 06bdc98ca23f9e05d5c166cd62a6543cbfc6d997 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Fri, 14 Mar 2025 09:58:41 +0000
Subject: [PATCH 275/278] Bump db txn expected count in relations tests

As we're now performing another db txn to check if the user is an admin.
---
 tests/rest/client/test_relations.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index f5a7602d0a5..8f2f44739cb 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -1181,7 +1181,7 @@ def assert_annotations(bundled_aggregations: JsonDict) -> None:
                 bundled_aggregations,
             )
 
-        self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 6)
+        self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 7)
 
     def test_thread(self) -> None:
         """
@@ -1226,21 +1226,21 @@ def assert_thread(bundled_aggregations: JsonDict) -> None:
 
         # The "user" sent the root event and is making queries for the bundled
         # aggregations: they have participated.
-        self._test_bundled_aggregations(RelationTypes.THREAD, _gen_assert(True), 6)
+        self._test_bundled_aggregations(RelationTypes.THREAD, _gen_assert(True), 7)
         # The "user2" sent replies in the thread and is making queries for the
         # bundled aggregations: they have participated.
         #
         # Note that this re-uses some cached values, so the total number of
         # queries is much smaller.
         self._test_bundled_aggregations(
-            RelationTypes.THREAD, _gen_assert(True), 3, access_token=self.user2_token
+            RelationTypes.THREAD, _gen_assert(True), 4, access_token=self.user2_token
         )
 
         # A user with no interactions with the thread: they have not participated.
         user3_id, user3_token = self._create_user("charlie")
         self.helper.join(self.room, user=user3_id, tok=user3_token)
         self._test_bundled_aggregations(
-            RelationTypes.THREAD, _gen_assert(False), 3, access_token=user3_token
+            RelationTypes.THREAD, _gen_assert(False), 4, access_token=user3_token
         )
 
     def test_thread_with_bundled_aggregations_for_latest(self) -> None:
@@ -1287,7 +1287,7 @@ def assert_thread(bundled_aggregations: JsonDict) -> None:
                 bundled_aggregations["latest_event"].get("unsigned"),
             )
 
-        self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 6)
+        self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 7)
 
     def test_nested_thread(self) -> None:
         """

From 5ced4efe1d9ef45f05a0d013fa509ca8534a7ef2 Mon Sep 17 00:00:00 2001
From: Devon Hudson <devonhudson@librem.one>
Date: Tue, 15 Apr 2025 10:48:32 -0600
Subject: [PATCH 276/278] 1.129.0rc1

---
 CHANGES.md                | 25 +++++++++++++++++++++++++
 changelog.d/18133.misc    |  1 -
 changelog.d/18232.feature |  1 -
 changelog.d/18260.feature |  1 -
 changelog.d/18294.docker  |  1 -
 changelog.d/18334.bugfix  |  1 -
 changelog.d/18335.bugfix  |  1 -
 changelog.d/18337.misc    |  1 -
 changelog.d/18339.bugfix  |  1 -
 debian/changelog          |  6 ++++++
 pyproject.toml            |  2 +-
 11 files changed, 32 insertions(+), 9 deletions(-)
 delete mode 100644 changelog.d/18133.misc
 delete mode 100644 changelog.d/18232.feature
 delete mode 100644 changelog.d/18260.feature
 delete mode 100644 changelog.d/18294.docker
 delete mode 100644 changelog.d/18334.bugfix
 delete mode 100644 changelog.d/18335.bugfix
 delete mode 100644 changelog.d/18337.misc
 delete mode 100644 changelog.d/18339.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index 4ac25a37862..fcd4b0b9afe 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,28 @@
+# Synapse 1.129.0rc1 (2025-04-15)
+
+### Features
+
+- Add `passthrough_authorization_parameters` in OIDC configuration to allow to pass parameters to the authorization grant URL. ([\#18232](https://github.com/element-hq/synapse/issues/18232))
+- Add `total_event_count`, `total_message_count`, and `total_e2ee_event_count` fields to the homeserver usage statistics. ([\#18260](https://github.com/element-hq/synapse/issues/18260))
+
+### Bugfixes
+
+- Fix `force_tracing_for_users` config when using delegated auth. ([\#18334](https://github.com/element-hq/synapse/issues/18334))
+- Fix the token introspection cache logging access tokens when MAS integration is in use. ([\#18335](https://github.com/element-hq/synapse/issues/18335))
+- Stop caching introspection failures when delegating auth to MAS. ([\#18339](https://github.com/element-hq/synapse/issues/18339))
+
+### Updates to the Docker image
+
+- Optimize the build of the complement-synapse image. ([\#18294](https://github.com/element-hq/synapse/issues/18294))
+
+### Internal Changes
+
+- Disable statement timeout during room purge. ([\#18133](https://github.com/element-hq/synapse/issues/18133))
+- Add cache to storage functions used to auth requests when using delegated auth. ([\#18337](https://github.com/element-hq/synapse/issues/18337))
+
+
+
+
 # Synapse 1.128.0 (2025-04-08)
 
 No significant changes since 1.128.0rc1.
diff --git a/changelog.d/18133.misc b/changelog.d/18133.misc
deleted file mode 100644
index 151ceb2cab3..00000000000
--- a/changelog.d/18133.misc
+++ /dev/null
@@ -1 +0,0 @@
-Disable statement timeout during room purge.
diff --git a/changelog.d/18232.feature b/changelog.d/18232.feature
deleted file mode 100644
index ba5059ba80a..00000000000
--- a/changelog.d/18232.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add `passthrough_authorization_parameters` in OIDC configuration to allow to pass parameters to the authorization grant URL.
diff --git a/changelog.d/18260.feature b/changelog.d/18260.feature
deleted file mode 100644
index e44e3dc990a..00000000000
--- a/changelog.d/18260.feature
+++ /dev/null
@@ -1 +0,0 @@
-Add `total_event_count`, `total_message_count`, and `total_e2ee_event_count` fields to the homeserver usage statistics.
diff --git a/changelog.d/18294.docker b/changelog.d/18294.docker
deleted file mode 100644
index cc40ca90c0f..00000000000
--- a/changelog.d/18294.docker
+++ /dev/null
@@ -1 +0,0 @@
-Optimize the build of the complement-synapse image.
diff --git a/changelog.d/18334.bugfix b/changelog.d/18334.bugfix
deleted file mode 100644
index d82e522cb89..00000000000
--- a/changelog.d/18334.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix `force_tracing_for_users` config when using delegated auth.
diff --git a/changelog.d/18335.bugfix b/changelog.d/18335.bugfix
deleted file mode 100644
index 50df5a1b1d7..00000000000
--- a/changelog.d/18335.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix the token introspection cache logging access tokens when MAS integration is in use.
\ No newline at end of file
diff --git a/changelog.d/18337.misc b/changelog.d/18337.misc
deleted file mode 100644
index b78276fe765..00000000000
--- a/changelog.d/18337.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add cache to storage functions used to auth requests when using delegated auth.
diff --git a/changelog.d/18339.bugfix b/changelog.d/18339.bugfix
deleted file mode 100644
index 09d6d734200..00000000000
--- a/changelog.d/18339.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Stop caching introspection failures when delegating auth to MAS.
diff --git a/debian/changelog b/debian/changelog
index 56839ac5b46..e9eb3314778 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.129.0~rc1) stable; urgency=medium
+
+  * New Synapse release 1.129.0rc1.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 15 Apr 2025 10:47:43 -0600
+
 matrix-synapse-py3 (1.128.0) stable; urgency=medium
 
   * New Synapse release 1.128.0.
diff --git a/pyproject.toml b/pyproject.toml
index 8f8eb46e68b..76a9cf4a702 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -97,7 +97,7 @@ module-name = "synapse.synapse_rust"
 
 [tool.poetry]
 name = "matrix-synapse"
-version = "1.128.0"
+version = "1.129.0rc1"
 description = "Homeserver for the Matrix decentralised comms protocol"
 authors = ["Matrix.org Team and Contributors <packages@matrix.org>"]
 license = "AGPL-3.0-or-later"

From 9b8eebbe4e765409b18c26c0c18d02e27ad79d12 Mon Sep 17 00:00:00 2001
From: Devon Hudson <devonhudson@librem.one>
Date: Tue, 15 Apr 2025 11:12:04 -0600
Subject: [PATCH 277/278] Changelog tweaks

---
 CHANGES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index fcd4b0b9afe..7fcb1fb4c7f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,7 +2,7 @@
 
 ### Features
 
-- Add `passthrough_authorization_parameters` in OIDC configuration to allow to pass parameters to the authorization grant URL. ([\#18232](https://github.com/element-hq/synapse/issues/18232))
+- Add `passthrough_authorization_parameters` in OIDC configuration to allow passing parameters to the authorization grant URL. ([\#18232](https://github.com/element-hq/synapse/issues/18232))
 - Add `total_event_count`, `total_message_count`, and `total_e2ee_event_count` fields to the homeserver usage statistics. ([\#18260](https://github.com/element-hq/synapse/issues/18260))
 
 ### Bugfixes

From d67e9c5367c39b977c881800a2451e5bf0c5b713 Mon Sep 17 00:00:00 2001
From: Devon Hudson <devonhudson@librem.one>
Date: Wed, 16 Apr 2025 07:19:27 -0600
Subject: [PATCH 278/278] Update changelog

---
 CHANGES.md               | 2 ++
 changelog.d/18342.bugfix | 1 -
 changelog.d/18345.bugfix | 1 -
 3 files changed, 2 insertions(+), 2 deletions(-)
 delete mode 100644 changelog.d/18342.bugfix
 delete mode 100644 changelog.d/18345.bugfix

diff --git a/CHANGES.md b/CHANGES.md
index 7fcb1fb4c7f..37ab5faf67f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -10,6 +10,8 @@
 - Fix `force_tracing_for_users` config when using delegated auth. ([\#18334](https://github.com/element-hq/synapse/issues/18334))
 - Fix the token introspection cache logging access tokens when MAS integration is in use. ([\#18335](https://github.com/element-hq/synapse/issues/18335))
 - Stop caching introspection failures when delegating auth to MAS. ([\#18339](https://github.com/element-hq/synapse/issues/18339))
+- Fix `ExternalIDReuse` exception after migrating to MAS on workers with a high traffic. ([\#18342](https://github.com/element-hq/synapse/issues/18342))
+- Fix minor performance regression caused by tracking of room participation. Regressed in v1.128.0. ([\#18345](https://github.com/element-hq/synapse/issues/18345))
 
 ### Updates to the Docker image
 
diff --git a/changelog.d/18342.bugfix b/changelog.d/18342.bugfix
deleted file mode 100644
index 6fa2fa679a9..00000000000
--- a/changelog.d/18342.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix `ExternalIDReuse` exception after migrating to MAS on workers with a high traffic.
diff --git a/changelog.d/18345.bugfix b/changelog.d/18345.bugfix
deleted file mode 100644
index c8a001d4a3f..00000000000
--- a/changelog.d/18345.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix minor performance regression caused by tracking of room participation. Regressed in v1.128.0.