From 505be7886a4d7e457cf03c26667ae651da969d0f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 4 Aug 2022 15:59:12 +0100 Subject: [PATCH 1/4] Update type of `EventContext.rejected` --- changelog.d/13460.misc | 1 + synapse/events/snapshot.py | 7 +++---- synapse/storage/databases/main/events.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 changelog.d/13460.misc diff --git a/changelog.d/13460.misc b/changelog.d/13460.misc new file mode 100644 index 000000000000..f9e9de219df7 --- /dev/null +++ b/changelog.d/13460.misc @@ -0,0 +1 @@ +Update type of `EventContext.rejected`. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index b700cbbfa197..d3c8083e4ab8 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -11,11 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, List, Optional, Tuple import attr from frozendict import frozendict -from typing_extensions import Literal from synapse.appservice import ApplicationService from synapse.events import EventBase @@ -33,7 +32,7 @@ class EventContext: Holds information relevant to persisting an event Attributes: - rejected: A rejection reason if the event was rejected, else False + rejected: A rejection reason if the event was rejected, else None _state_group: The ID of the state group for this event. Note that state events are persisted with a state group which includes the new event, so this is @@ -85,7 +84,7 @@ class EventContext: """ _storage: "StorageControllers" - rejected: Union[Literal[False], str] = False + rejected: Optional[str] = None _state_group: Optional[int] = None state_group_before_event: Optional[int] = None _state_delta_due_to_event: Optional[StateMap[str]] = None diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 1f600f119029..5560b38a4832 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1490,7 +1490,7 @@ def event_dict(event: EventBase) -> JsonDict: event.sender, "url" in event.content and isinstance(event.content["url"], str), event.get_state_key(), - context.rejected or None, + context.rejected, ) for event, context in events_and_contexts ), From b1650034f7499e2fffbcab0948c65f9cee563822 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 4 Aug 2022 15:39:58 +0100 Subject: [PATCH 2/4] Update the rejected state of events during resync Events can be un-rejected or newly-rejected during resync, so ensure we update the database and caches when that happens. --- changelog.d/13459.misc | 1 + .../storage/databases/main/events_worker.py | 55 +++++++++++++++++++ synapse/storage/databases/main/state.py | 5 ++ synapse/storage/state.py | 9 --- 4 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 changelog.d/13459.misc diff --git a/changelog.d/13459.misc b/changelog.d/13459.misc new file mode 100644 index 000000000000..e6082210a0d8 --- /dev/null +++ b/changelog.d/13459.misc @@ -0,0 +1 @@ +Faster joins: update the rejected state of events during de-partial-stating. diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 29c99c635735..65542d20b5f0 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -2139,3 +2139,58 @@ def _get_partial_state_events_batch_txn( (room_id,), ) return [row[0] for row in txn] + + def mark_event_rejected_txn( + self, + txn: LoggingTransaction, + event_id: str, + rejection_reason: Optional[str], + ) -> None: + """Mark an event that was previously accepted as rejected, or vice versa + + This can happen, for example, when resyncing state during a faster join. + + Args: + txn: + event_id: ID of event to update + rejection_reason: reason it has been rejected, or None if it is now accepted + """ + if rejection_reason is None: + logger.info( + "Marking previously-processed event %s as accepted", + event_id, + ) + self.db_pool.simple_delete_txn( + txn, + "rejections", + keyvalues={"event_id": event_id}, + ) + else: + logger.info( + "Marking previously-processed event %s as rejected(%s)", + event_id, + rejection_reason, + ) + self.db_pool.simple_upsert_txn( + txn, + table="rejections", + keyvalues={"event_id": event_id}, + values={ + "reason": rejection_reason, + "last_check": self._clock.time_msec(), + }, + ) + self.db_pool.simple_update_txn( + txn, + table="events", + keyvalues={"event_id": event_id}, + updatevalues={"rejection_reason": rejection_reason}, + ) + + self.invalidate_get_event_cache_after_txn(txn, event_id) + + # TODO(faster_joins): invalidate the cache on workers. Ideally we'd just + # call '_send_invalidation_to_replication', but we actually need the other + # end to call _invalidate_local_event_cache() rather than (just) + # _get_event_cache.invalidate(). + # https://github.com/matrix-org/synapse/issues/12994 diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index f70705a0af19..0b10af0e580e 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -430,6 +430,11 @@ def _update_state_for_partial_state_event_txn( updatevalues={"state_group": state_group}, ) + # the event may now be rejected where it was not before, or vice versa, + # in which case we need to update the rejected flags. + if bool(context.rejected) != (event.rejected_reason is not None): + self.mark_event_rejected_txn(txn, event.event_id, context.rejected) + self.db_pool.simple_delete_one_txn( txn, table="partial_state_events", diff --git a/synapse/storage/state.py b/synapse/storage/state.py index af3bab2c15d9..0004d955b434 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -539,15 +539,6 @@ def must_await_full_state(self, is_mine_id: Callable[[str], bool]) -> bool: is_mine_id: a callable which confirms if a given state_key matches a mxid of a local user """ - - # TODO(faster_joins): it's not entirely clear that this is safe. In particular, - # there may be circumstances in which we return a piece of state that, once we - # resync the state, we discover is invalid. For example: if it turns out that - # the sender of a piece of state wasn't actually in the room, then clearly that - # state shouldn't have been returned. - # We should at least add some tests around this to see what happens. - # https://github.com/matrix-org/synapse/issues/13006 - # if we haven't requested membership events, then it depends on the value of # 'include_others' if EventTypes.Member not in self.types: From d4fd08e370d59224f4c8a7195f4473bd6bfd51ca Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 11 Aug 2022 09:29:13 +0100 Subject: [PATCH 3/4] Update synapse/storage/databases/main/events_worker.py --- synapse/storage/databases/main/events_worker.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 83fa7e751342..716724f07a0d 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -2252,6 +2252,11 @@ def mark_event_rejected_txn( # TODO(faster_joins): invalidate the cache on workers. Ideally we'd just # call '_send_invalidation_to_replication', but we actually need the other - # end to call _invalidate_local_event_cache() rather than (just) + # end to call _invalidate_local_get_event_cache() rather than (just) # _get_event_cache.invalidate(). + # + # One solution might be to (somehow) get the workers to call + # _invalidate_caches_for_event() (though that will invalidate more than + # strictly necessary). + # # https://github.com/matrix-org/synapse/issues/12994 From d5f8cc9b9564cc161be449d034afc20c6e98d4cd Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 11 Aug 2022 09:31:49 +0100 Subject: [PATCH 4/4] lint --- synapse/storage/databases/main/events_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 716724f07a0d..b07d812ae26d 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -2255,7 +2255,7 @@ def mark_event_rejected_txn( # end to call _invalidate_local_get_event_cache() rather than (just) # _get_event_cache.invalidate(). # - # One solution might be to (somehow) get the workers to call + # One solution might be to (somehow) get the workers to call # _invalidate_caches_for_event() (though that will invalidate more than # strictly necessary). #