From 7321fe414bea98d17c9b338405dae9fa84335cdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gon=C3=A7alo=20Rica=20Pais=20da=20Silva?= Date: Tue, 14 Nov 2023 12:23:52 +0100 Subject: [PATCH 1/5] Optimise `Entity` with repr align & manual `PartialOrd` --- crates/bevy_ecs/src/entity/mod.rs | 40 ++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/crates/bevy_ecs/src/entity/mod.rs b/crates/bevy_ecs/src/entity/mod.rs index e0bbd568afd80..c9e420decc3f2 100644 --- a/crates/bevy_ecs/src/entity/mod.rs +++ b/crates/bevy_ecs/src/entity/mod.rs @@ -115,10 +115,13 @@ type IdCursor = isize; /// [`EntityCommands`]: crate::system::EntityCommands /// [`Query::get`]: crate::system::Query::get /// [`World`]: crate::world::World -#[derive(Clone, Copy, Eq, Ord, PartialOrd)] +#[derive(Clone, Copy, Eq)] +// Alignment repr necessary to allow LLVM to better output +// optimised codegen for `to_bits`, `PartialEq` and `Ord`. +#[repr(align(8))] pub struct Entity { - generation: u32, index: u32, + generation: u32, } // By not short-circuiting in comparisons, we get better codegen. @@ -126,7 +129,38 @@ pub struct Entity { impl PartialEq for Entity { #[inline] fn eq(&self, other: &Entity) -> bool { - (self.generation == other.generation) & (self.index == other.index) + // By using `to_bits`, the codegen can be optimised out even + // further potentially. Relies on the correct alignment/order of + // `Entity`. + self.to_bits() == other.to_bits() + } +} + +// The macro codegen output is not optimal and can't be optimised as well +// by the compiler. This impl resolves that like with `PartialEq`, but by +// instead relying on the better alignment of `Entity`, and then by +// comparing against the bit representation directly instead of relying on +// the normal field order. +impl PartialOrd for Entity { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + // Make use of our `Ord` impl to ensure optimal codegen output + Some(self.cmp(other)) + } +} + +// The macro codegen output is not optimal and can't be optimised as well +// by the compiler. This impl resolves that like with `PartialEq`, but by +// instead relying on the better alignment of `Entity`, and then by +// comparing against the bit representation directly instead of relying on +// the normal field order. +impl Ord for Entity { + #[inline] + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // This will result in better codegen for ordering comparisons, plus + // avoids pitfalls with regards to macro codegen relying on property + // position when we want to compare against the bit representation. + self.to_bits().cmp(&other.to_bits()) } } From 59bacbde77ab42023ff5d673e634552611f05ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gon=C3=A7alo=20Rica=20Pais=20da=20Silva?= Date: Tue, 14 Nov 2023 13:39:01 +0100 Subject: [PATCH 2/5] Add extra inlining hints for `Entity` --- crates/bevy_ecs/src/entity/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/bevy_ecs/src/entity/mod.rs b/crates/bevy_ecs/src/entity/mod.rs index c9e420decc3f2..84c757f500c99 100644 --- a/crates/bevy_ecs/src/entity/mod.rs +++ b/crates/bevy_ecs/src/entity/mod.rs @@ -179,6 +179,7 @@ pub(crate) enum AllocAtWithoutReplacement { impl Entity { #[cfg(test)] + #[inline] pub(crate) const fn new(index: u32, generation: u32) -> Entity { Entity { index, generation } } @@ -231,6 +232,7 @@ impl Entity { /// In general, one should not try to synchronize the ECS by attempting to ensure that /// `Entity` lines up between instances, but instead insert a secondary identifier as /// a component. + #[inline] pub const fn from_raw(index: u32) -> Entity { Entity { index, @@ -244,6 +246,7 @@ impl Entity { /// for serialization between runs. /// /// No particular structure is guaranteed for the returned bits. + #[inline] pub const fn to_bits(self) -> u64 { (self.generation as u64) << 32 | self.index as u64 } @@ -251,6 +254,7 @@ impl Entity { /// Reconstruct an `Entity` previously destructured with [`Entity::to_bits`]. /// /// Only useful when applied to results from `to_bits` in the same instance of an application. + #[inline] pub const fn from_bits(bits: u64) -> Self { Self { generation: (bits >> 32) as u32, From 39d9259dd92e84fa5a9d6a133c51778f5a51014c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gon=C3=A7alo=20Rica=20Pais=20da=20Silva?= Date: Tue, 14 Nov 2023 13:50:43 +0100 Subject: [PATCH 3/5] Add issue ref to LLVM output bug for `PartialOrd` --- crates/bevy_ecs/src/entity/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/bevy_ecs/src/entity/mod.rs b/crates/bevy_ecs/src/entity/mod.rs index 84c757f500c99..c5d388686116f 100644 --- a/crates/bevy_ecs/src/entity/mod.rs +++ b/crates/bevy_ecs/src/entity/mod.rs @@ -118,6 +118,7 @@ type IdCursor = isize; #[derive(Clone, Copy, Eq)] // Alignment repr necessary to allow LLVM to better output // optimised codegen for `to_bits`, `PartialEq` and `Ord`. +// See #[repr(align(8))] pub struct Entity { index: u32, From a99626af2faf4b574e89367bb76f0d92db35ba98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gon=C3=A7alo=20Rica=20Pais=20da=20Silva?= Date: Tue, 14 Nov 2023 14:54:12 +0100 Subject: [PATCH 4/5] Comment on `Entity` field order --- crates/bevy_ecs/src/entity/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/bevy_ecs/src/entity/mod.rs b/crates/bevy_ecs/src/entity/mod.rs index c5d388686116f..921beded4f2e1 100644 --- a/crates/bevy_ecs/src/entity/mod.rs +++ b/crates/bevy_ecs/src/entity/mod.rs @@ -121,6 +121,8 @@ type IdCursor = isize; // See #[repr(align(8))] pub struct Entity { + // The field order below is necessary for rust to + // optimise better how it handles `Entity` index: u32, generation: u32, } From b025e554c0e46b855ea4bbba5e289da1fb4e330b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gon=C3=A7alo=20Rica=20Pais=20da=20Silva?= Date: Wed, 15 Nov 2023 08:46:55 +0100 Subject: [PATCH 5/5] Endian repr, comment cleanup and clarifications --- crates/bevy_ecs/src/entity/mod.rs | 45 +++++++++++++++++-------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/crates/bevy_ecs/src/entity/mod.rs b/crates/bevy_ecs/src/entity/mod.rs index 921beded4f2e1..0723204d70dd0 100644 --- a/crates/bevy_ecs/src/entity/mod.rs +++ b/crates/bevy_ecs/src/entity/mod.rs @@ -115,16 +115,18 @@ type IdCursor = isize; /// [`EntityCommands`]: crate::system::EntityCommands /// [`Query::get`]: crate::system::Query::get /// [`World`]: crate::world::World -#[derive(Clone, Copy, Eq)] +#[derive(Clone, Copy)] // Alignment repr necessary to allow LLVM to better output // optimised codegen for `to_bits`, `PartialEq` and `Ord`. -// See -#[repr(align(8))] +#[repr(C, align(8))] pub struct Entity { - // The field order below is necessary for rust to - // optimise better how it handles `Entity` + // Do not reorder the fields here. The ordering is explicitly used by repr(C) + // to make this struct equivalent to a u64. + #[cfg(target_endian = "little")] index: u32, generation: u32, + #[cfg(target_endian = "big")] + index: u32, } // By not short-circuiting in comparisons, we get better codegen. @@ -133,17 +135,20 @@ impl PartialEq for Entity { #[inline] fn eq(&self, other: &Entity) -> bool { // By using `to_bits`, the codegen can be optimised out even - // further potentially. Relies on the correct alignment/order of - // `Entity`. + // further potentially. Relies on the correct alignment/field + // order of `Entity`. self.to_bits() == other.to_bits() } } -// The macro codegen output is not optimal and can't be optimised as well -// by the compiler. This impl resolves that like with `PartialEq`, but by -// instead relying on the better alignment of `Entity`, and then by -// comparing against the bit representation directly instead of relying on -// the normal field order. +impl Eq for Entity {} + +// The derive macro codegen output is not optimal and can't be optimised as well +// by the compiler. This impl resolves the issue of non-optimal codegen by relying +// on comparing against the bit representation of `Entity` instead of comparing +// the fields. The result is then LLVM is able to optimise the codegen for Entity +// far beyond what the derive macro can. +// See impl PartialOrd for Entity { #[inline] fn partial_cmp(&self, other: &Self) -> Option { @@ -152,11 +157,12 @@ impl PartialOrd for Entity { } } -// The macro codegen output is not optimal and can't be optimised as well -// by the compiler. This impl resolves that like with `PartialEq`, but by -// instead relying on the better alignment of `Entity`, and then by -// comparing against the bit representation directly instead of relying on -// the normal field order. +// The derive macro codegen output is not optimal and can't be optimised as well +// by the compiler. This impl resolves the issue of non-optimal codegen by relying +// on comparing against the bit representation of `Entity` instead of comparing +// the fields. The result is then LLVM is able to optimise the codegen for Entity +// far beyond what the derive macro can. +// See impl Ord for Entity { #[inline] fn cmp(&self, other: &Self) -> std::cmp::Ordering { @@ -182,7 +188,6 @@ pub(crate) enum AllocAtWithoutReplacement { impl Entity { #[cfg(test)] - #[inline] pub(crate) const fn new(index: u32, generation: u32) -> Entity { Entity { index, generation } } @@ -249,7 +254,7 @@ impl Entity { /// for serialization between runs. /// /// No particular structure is guaranteed for the returned bits. - #[inline] + #[inline(always)] pub const fn to_bits(self) -> u64 { (self.generation as u64) << 32 | self.index as u64 } @@ -257,7 +262,7 @@ impl Entity { /// Reconstruct an `Entity` previously destructured with [`Entity::to_bits`]. /// /// Only useful when applied to results from `to_bits` in the same instance of an application. - #[inline] + #[inline(always)] pub const fn from_bits(bits: u64) -> Self { Self { generation: (bits >> 32) as u32,