From a308d99f30d7e660115e355c54c37ac149cdbe53 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Aug 2024 12:27:42 -0500 Subject: [PATCH] Sliding Sync: Exclude partially stated rooms if we must await full state (#17538) Previously, we just had very basic partial room exclusion based on whether we were lazy-loading room members. Now with this PR, we added `must_await_full_state(...)` with rules to check if we have a we're only requesting `required_state` which is completely satisfied even with partial state. Partially-stated rooms should have all state events except for remote membership events so if we require a remote membership event anywhere, then we need to return `True`. --- changelog.d/17538.bugfix | 1 + synapse/handlers/sliding_sync.py | 104 ++++++++-- .../sliding_sync/test_rooms_required_state.py | 195 +++++++++++++++--- 3 files changed, 255 insertions(+), 45 deletions(-) create mode 100644 changelog.d/17538.bugfix diff --git a/changelog.d/17538.bugfix b/changelog.d/17538.bugfix new file mode 100644 index 00000000000..9e4e31dbdb0 --- /dev/null +++ b/changelog.d/17538.bugfix @@ -0,0 +1 @@ +Better exclude partially stated rooms if we must await full state in experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 18a96843bee..99510254f31 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -24,6 +24,7 @@ from typing import ( TYPE_CHECKING, Any, + Callable, Dict, Final, List, @@ -366,6 +367,73 @@ def combine_room_sync_config( else: self.required_state_map[state_type].add(state_key) + def must_await_full_state( + self, + is_mine_id: Callable[[str], bool], + ) -> bool: + """ + Check if we have a we're only requesting `required_state` which is completely + satisfied even with partial state, then we don't need to `await_full_state` before + we can return it. + + Also see `StateFilter.must_await_full_state(...)` for comparison + + Partially-stated rooms should have all state events except for remote membership + events so if we require a remote membership event anywhere, then we need to + return `True` (requires full state). + + Args: + is_mine_id: a callable which confirms if a given state_key matches a mxid + of a local user + """ + wildcard_state_keys = self.required_state_map.get(StateValues.WILDCARD) + # Requesting *all* state in the room so we have to wait + if ( + wildcard_state_keys is not None + and StateValues.WILDCARD in wildcard_state_keys + ): + return True + + # If the wildcards don't refer to remote user IDs, then we don't need to wait + # for full state. + if wildcard_state_keys is not None: + for possible_user_id in wildcard_state_keys: + if not possible_user_id[0].startswith(UserID.SIGIL): + # Not a user ID + continue + + localpart_hostname = possible_user_id.split(":", 1) + if len(localpart_hostname) < 2: + # Not a user ID + continue + + if not is_mine_id(possible_user_id): + return True + + membership_state_keys = self.required_state_map.get(EventTypes.Member) + # We aren't requesting any membership events at all so the partial state will + # cover us. + if membership_state_keys is None: + return False + + # If we're requesting entirely local users, the partial state will cover us. + for user_id in membership_state_keys: + if user_id == StateValues.ME: + continue + # We're lazy-loading membership so we can just return the state we have. + # Lazy-loading means we include membership for any event `sender` in the + # timeline but since we had to auth those timeline events, we will have the + # membership state for them (including from remote senders). + elif user_id == StateValues.LAZY: + continue + elif user_id == StateValues.WILDCARD: + return False + elif not is_mine_id(user_id): + return True + + # Local users only so the partial state will cover us. + return False + class StateValues: """ @@ -395,6 +463,7 @@ def __init__(self, hs: "HomeServer"): self.device_handler = hs.get_device_handler() self.push_rules_handler = hs.get_push_rules_handler() self.rooms_to_exclude_globally = hs.config.server.rooms_to_exclude_from_sync + self.is_mine_id = hs.is_mine_id self.connection_store = SlidingSyncConnectionStore() @@ -575,19 +644,10 @@ async def current_sync_for_user( # Since creating the `RoomSyncConfig` takes some work, let's just do it # once and make a copy whenever we need it. room_sync_config = RoomSyncConfig.from_room_config(list_config) - membership_state_keys = room_sync_config.required_state_map.get( - EventTypes.Member - ) - # Also see `StateFilter.must_await_full_state(...)` for comparison - lazy_loading = ( - membership_state_keys is not None - and StateValues.LAZY in membership_state_keys - ) - if not lazy_loading: - # Exclude partially-stated rooms unless the `required_state` - # only has `["m.room.member", "$LAZY"]` for membership - # (lazy-loading room members). + # Exclude partially-stated rooms if we must wait for the room to be + # fully-stated + if room_sync_config.must_await_full_state(self.is_mine_id): filtered_sync_room_map = { room_id: room for room_id, room in filtered_sync_room_map.items() @@ -654,6 +714,12 @@ async def current_sync_for_user( # Handle room subscriptions if has_room_subscriptions and sync_config.room_subscriptions is not None: with start_active_span("assemble_room_subscriptions"): + # Find which rooms are partially stated and may need to be filtered out + # depending on the `required_state` requested (see below). + partial_state_room_map = await self.store.is_partial_state_room_batched( + sync_config.room_subscriptions.keys() + ) + for ( room_id, room_subscription, @@ -677,12 +743,20 @@ async def current_sync_for_user( ) # Take the superset of the `RoomSyncConfig` for each room. - # - # Update our `relevant_room_map` with the room we're going to display - # and need to fetch more info about. room_sync_config = RoomSyncConfig.from_room_config( room_subscription ) + + # Exclude partially-stated rooms if we must wait for the room to be + # fully-stated + if room_sync_config.must_await_full_state(self.is_mine_id): + if partial_state_room_map.get(room_id): + continue + + all_rooms.add(room_id) + + # Update our `relevant_room_map` with the room we're going to display + # and need to fetch more info about. existing_room_sync_config = relevant_room_map.get(room_id) if existing_room_sync_config is not None: existing_room_sync_config.combine_room_sync_config( diff --git a/tests/rest/client/sliding_sync/test_rooms_required_state.py b/tests/rest/client/sliding_sync/test_rooms_required_state.py index a13cad223f4..823e7db569d 100644 --- a/tests/rest/client/sliding_sync/test_rooms_required_state.py +++ b/tests/rest/client/sliding_sync/test_rooms_required_state.py @@ -631,8 +631,7 @@ def test_rooms_required_state_combine_superset(self) -> None: def test_rooms_required_state_partial_state(self) -> None: """ - Test partially-stated room are excluded unless `rooms.required_state` is - lazy-loading room members. + Test partially-stated room are excluded if they require full state. """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -649,43 +648,153 @@ def test_rooms_required_state_partial_state(self) -> None: mark_event_as_partial_state(self.hs, join_response2["event_id"], room_id2) ) - # Make the Sliding Sync request (NOT lazy-loading room members) + # Make the Sliding Sync request with examples where `must_await_full_state()` is + # `False` sync_body = { "lists": { - "foo-list": { + "no-state-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 0, + }, + "other-state-list": { "ranges": [[0, 1]], "required_state": [ [EventTypes.Create, ""], ], "timeline_limit": 0, }, + "lazy-load-list": { + "ranges": [[0, 1]], + "required_state": [ + [EventTypes.Create, ""], + # Lazy-load room members + [EventTypes.Member, StateValues.LAZY], + # Local member + [EventTypes.Member, user2_id], + ], + "timeline_limit": 0, + }, + "local-members-only-list": { + "ranges": [[0, 1]], + "required_state": [ + # Own user ID + [EventTypes.Member, user1_id], + # Local member + [EventTypes.Member, user2_id], + ], + "timeline_limit": 0, + }, + "me-list": { + "ranges": [[0, 1]], + "required_state": [ + # Own user ID + [EventTypes.Member, StateValues.ME], + # Local member + [EventTypes.Member, user2_id], + ], + "timeline_limit": 0, + }, + "wildcard-type-local-state-key-list": { + "ranges": [[0, 1]], + "required_state": [ + ["*", user1_id], + # Not a user ID + ["*", "foobarbaz"], + # Not a user ID + ["*", "foo.bar.baz"], + # Not a user ID + ["*", "@foo"], + ], + "timeline_limit": 0, + }, } } response_body, _ = self.do_sync(sync_body, tok=user1_tok) - # Make sure the list includes room1 but room2 is excluded because it's still - # partially-stated - self.assertListEqual( - list(response_body["lists"]["foo-list"]["ops"]), - [ - { - "op": "SYNC", - "range": [0, 1], - "room_ids": [room_id1], + # The list should include both rooms now because we don't need full state + for list_key in response_body["lists"].keys(): + self.assertIncludes( + set(response_body["lists"][list_key]["ops"][0]["room_ids"]), + {room_id2, room_id1}, + exact=True, + message=f"Expected all rooms to show up for list_key={list_key}. Response " + + str(response_body["lists"][list_key]), + ) + + # Take each of the list variants and apply them to room subscriptions to make + # sure the same rules apply + for list_key in sync_body["lists"].keys(): + sync_body_for_subscriptions = { + "room_subscriptions": { + room_id1: { + "required_state": sync_body["lists"][list_key][ + "required_state" + ], + "timeline_limit": 0, + }, + room_id2: { + "required_state": sync_body["lists"][list_key][ + "required_state" + ], + "timeline_limit": 0, + }, } - ], - response_body["lists"]["foo-list"], - ) + } + response_body, _ = self.do_sync(sync_body_for_subscriptions, tok=user1_tok) + + self.assertIncludes( + set(response_body["rooms"].keys()), + {room_id2, room_id1}, + exact=True, + message=f"Expected all rooms to show up for test_key={list_key}.", + ) - # Make the Sliding Sync request (with lazy-loading room members) + # ===================================================================== + + # Make the Sliding Sync request with examples where `must_await_full_state()` is + # `True` sync_body = { "lists": { - "foo-list": { + "wildcard-list": { + "ranges": [[0, 1]], + "required_state": [ + ["*", "*"], + ], + "timeline_limit": 0, + }, + "wildcard-type-remote-state-key-list": { + "ranges": [[0, 1]], + "required_state": [ + ["*", "@some:remote"], + # Not a user ID + ["*", "foobarbaz"], + # Not a user ID + ["*", "foo.bar.baz"], + # Not a user ID + ["*", "@foo"], + ], + "timeline_limit": 0, + }, + "remote-member-list": { + "ranges": [[0, 1]], + "required_state": [ + # Own user ID + [EventTypes.Member, user1_id], + # Remote member + [EventTypes.Member, "@some:remote"], + # Local member + [EventTypes.Member, user2_id], + ], + "timeline_limit": 0, + }, + "lazy-but-remote-member-list": { "ranges": [[0, 1]], "required_state": [ - [EventTypes.Create, ""], # Lazy-load room members [EventTypes.Member, StateValues.LAZY], + # Remote member + [EventTypes.Member, "@some:remote"], ], "timeline_limit": 0, }, @@ -693,15 +802,41 @@ def test_rooms_required_state_partial_state(self) -> None: } response_body, _ = self.do_sync(sync_body, tok=user1_tok) - # The list should include both rooms now because we're lazy-loading room members - self.assertListEqual( - list(response_body["lists"]["foo-list"]["ops"]), - [ - { - "op": "SYNC", - "range": [0, 1], - "room_ids": [room_id2, room_id1], + # Make sure the list includes room1 but room2 is excluded because it's still + # partially-stated + for list_key in response_body["lists"].keys(): + self.assertIncludes( + set(response_body["lists"][list_key]["ops"][0]["room_ids"]), + {room_id1}, + exact=True, + message=f"Expected only fully-stated rooms to show up for list_key={list_key}. Response " + + str(response_body["lists"][list_key]), + ) + + # Take each of the list variants and apply them to room subscriptions to make + # sure the same rules apply + for list_key in sync_body["lists"].keys(): + sync_body_for_subscriptions = { + "room_subscriptions": { + room_id1: { + "required_state": sync_body["lists"][list_key][ + "required_state" + ], + "timeline_limit": 0, + }, + room_id2: { + "required_state": sync_body["lists"][list_key][ + "required_state" + ], + "timeline_limit": 0, + }, } - ], - response_body["lists"]["foo-list"], - ) + } + response_body, _ = self.do_sync(sync_body_for_subscriptions, tok=user1_tok) + + self.assertIncludes( + set(response_body["rooms"].keys()), + {room_id1}, + exact=True, + message=f"Expected only fully-stated rooms to show up for test_key={list_key}.", + )