From d832ea3ecf6242564e0dc35b98b6a409b1d3b7cf Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 13 May 2022 14:17:01 -0700 Subject: [PATCH 1/6] Use intrinsic for genLog2() --- src/coreclr/inc/bitposition.h | 50 +++++++++++++---------------------- src/coreclr/jit/compiler.hpp | 45 +++---------------------------- 2 files changed, 21 insertions(+), 74 deletions(-) diff --git a/src/coreclr/inc/bitposition.h b/src/coreclr/inc/bitposition.h index e1f05ffe9ca03d..18af056a500c83 100644 --- a/src/coreclr/inc/bitposition.h +++ b/src/coreclr/inc/bitposition.h @@ -12,46 +12,32 @@ // // Notes: // 'value' must have exactly one bit set. -// The algorithm is as follows: -// - PRIME is a prime bigger than sizeof(unsigned int), which is not of the -// form 2^n-1. -// - Taking the modulo of 'value' with this will produce a unique hash for all -// powers of 2 (which is what "value" is). -// - Entries in hashTable[] which are -1 should never be used. There -// should be PRIME-8*sizeof(value) entries which are -1 . // inline unsigned BitPosition(unsigned value) { _ASSERTE((value != 0) && ((value & (value-1)) == 0)); -#if defined(HOST_ARM) && defined(__llvm__) - // use intrinsic functions for arm32 - // this is applied for LLVM only but it may work for some compilers - DWORD index = __builtin_clz(__builtin_arm_rbit(value)); -#elif !defined(HOST_AMD64) - const unsigned PRIME = 37; - - static const char hashTable[PRIME] = - { - -1, 0, 1, 26, 2, 23, 27, -1, 3, 16, - 24, 30, 28, 11, -1, 13, 4, 7, 17, -1, - 25, 22, 31, 15, 29, 10, 12, 6, -1, 21, - 14, 9, 5, 20, 8, 19, 18 - }; - - _ASSERTE(PRIME >= 8*sizeof(value)); - _ASSERTE(sizeof(hashTable) == PRIME); + DWORD index; + BitScanForward(&index, value); + return index; +} - unsigned hash = value % PRIME; - unsigned index = hashTable[hash]; - _ASSERTE(index != (unsigned char)-1); -#else - // not enabled for x86 because BSF is extremely slow on Atom - // (15 clock cycles vs 1-3 on any other Intel CPU post-P4) +//------------------------------------------------------------------------ +// BitPosition: Return the position of the single bit that is set in 'value'. +// +// Return Value: +// The position (0 is LSB) of bit that is set in 'value' +// +// Notes: +// 'value' must have exactly one bit set. +// +inline +unsigned BitPosition(unsigned __int64 value) +{ + _ASSERTE((value != 0) && ((value & (value-1)) == 0)); DWORD index; - BitScanForward(&index, value); -#endif + BitScanForward64(&index, value); return index; } diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index e610096100f253..b1a2b847a07dea 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -101,39 +101,12 @@ inline T genFindLowestBit(T value) /***************************************************************************** * * Return the highest bit that is set (that is, a mask that includes just the highest bit). - * TODO-ARM64-Throughput: we should convert these to use the _BitScanReverse() / _BitScanReverse64() - * compiler intrinsics, but our CRT header file intrin.h doesn't define these for ARM64 yet. */ - -inline unsigned int genFindHighestBit(unsigned int mask) -{ - assert(mask != 0); - unsigned int bit = 1U << ((sizeof(unsigned int) * 8) - 1); // start looking at the top - while ((bit & mask) == 0) - { - bit >>= 1; - } - return bit; -} - -inline unsigned __int64 genFindHighestBit(unsigned __int64 mask) -{ - assert(mask != 0); - unsigned __int64 bit = 1ULL << ((sizeof(unsigned __int64) * 8) - 1); // start looking at the top - while ((bit & mask) == 0) - { - bit >>= 1; - } - return bit; -} - -#if 0 -// TODO-ARM64-Cleanup: These should probably be the implementation, when intrin.h is updated for ARM64 inline unsigned int genFindHighestBit(unsigned int mask) { assert(mask != 0); - unsigned int index; + unsigned long index; _BitScanReverse(&index, mask); return 1L << index; } @@ -142,11 +115,10 @@ inline unsigned __int64 genFindHighestBit(unsigned __int64 mask) { assert(mask != 0); - unsigned int index; + unsigned long index; _BitScanReverse64(&index, mask); return 1LL << index; } -#endif // 0 /***************************************************************************** * @@ -222,18 +194,7 @@ inline unsigned uhi32(unsigned __int64 value) inline unsigned genLog2(unsigned __int64 value) { - unsigned lo32 = ulo32(value); - unsigned hi32 = uhi32(value); - - if (lo32 != 0) - { - assert(hi32 == 0); - return genLog2(lo32); - } - else - { - return genLog2(hi32) + 32; - } + return BitPosition(value); } /***************************************************************************** From 74088d2a8562e9aef3cbb532e80278d12270c0c4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 13 May 2022 15:04:02 -0700 Subject: [PATCH 2/6] Use correct macro --- src/coreclr/jit/compiler.hpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index b1a2b847a07dea..24eedbf65511fb 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -102,21 +102,19 @@ inline T genFindLowestBit(T value) * * Return the highest bit that is set (that is, a mask that includes just the highest bit). */ -inline -unsigned int genFindHighestBit(unsigned int mask) +inline unsigned int genFindHighestBit(unsigned int mask) { assert(mask != 0); unsigned long index; - _BitScanReverse(&index, mask); + BitScanReverse(&index, mask); return 1L << index; } -inline -unsigned __int64 genFindHighestBit(unsigned __int64 mask) +inline unsigned __int64 genFindHighestBit(unsigned __int64 mask) { assert(mask != 0); unsigned long index; - _BitScanReverse64(&index, mask); + BitScanReverse64(&index, mask); return 1LL << index; } From 7433acb6f8c9af8bf34f822ae7c8a91e82ddb619 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 13 May 2022 17:31:25 -0700 Subject: [PATCH 3/6] Fix non-msvc build --- src/coreclr/jit/compiler.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 24eedbf65511fb..8bd5eb52085fb9 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -105,7 +105,11 @@ inline T genFindLowestBit(T value) inline unsigned int genFindHighestBit(unsigned int mask) { assert(mask != 0); +#if defined(_MSC_VER) unsigned long index; +#else + unsigned int index; +#endif BitScanReverse(&index, mask); return 1L << index; } @@ -113,7 +117,11 @@ inline unsigned int genFindHighestBit(unsigned int mask) inline unsigned __int64 genFindHighestBit(unsigned __int64 mask) { assert(mask != 0); +#if defined(_MSC_VER) unsigned long index; +#else + unsigned int index; +#endif BitScanReverse64(&index, mask); return 1LL << index; } From 9e4399c3ac8a0dfcb414a8034e0c2bab2e75b4b5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 13 May 2022 17:34:36 -0700 Subject: [PATCH 4/6] fix 32-bit build --- src/coreclr/jit/compiler.hpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 8bd5eb52085fb9..d1662fc6d2c184 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -200,7 +200,22 @@ inline unsigned uhi32(unsigned __int64 value) inline unsigned genLog2(unsigned __int64 value) { +#ifdef TARGET_64BIT return BitPosition(value); +#else // TARGET_32BIT + unsigned lo32 = ulo32(value); + unsigned hi32 = uhi32(value); + + if (lo32 != 0) + { + assert(hi32 == 0); + return genLog2(lo32); + } + else + { + return genLog2(hi32) + 32; + } +#endif } /***************************************************************************** From 400dd558e23633c74779f6c627afdb4856521a93 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 13 May 2022 23:10:21 -0700 Subject: [PATCH 5/6] just do for TARGET_64BIT --- src/coreclr/inc/bitposition.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/inc/bitposition.h b/src/coreclr/inc/bitposition.h index 18af056a500c83..f03e6d6cb0852b 100644 --- a/src/coreclr/inc/bitposition.h +++ b/src/coreclr/inc/bitposition.h @@ -23,6 +23,7 @@ unsigned BitPosition(unsigned value) } +#ifdef TARGET_64BIT //------------------------------------------------------------------------ // BitPosition: Return the position of the single bit that is set in 'value'. // @@ -40,5 +41,6 @@ unsigned BitPosition(unsigned __int64 value) BitScanForward64(&index, value); return index; } +#endif // TARGET_64BIT #endif From 3522579ba2d3d85a5007d112facc4adf8620250c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 16 May 2022 12:12:31 -0700 Subject: [PATCH 6/6] Review comments --- src/coreclr/inc/bitposition.h | 6 ++++-- src/coreclr/jit/compiler.hpp | 29 ++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/coreclr/inc/bitposition.h b/src/coreclr/inc/bitposition.h index f03e6d6cb0852b..aacc50ce15e017 100644 --- a/src/coreclr/inc/bitposition.h +++ b/src/coreclr/inc/bitposition.h @@ -12,6 +12,7 @@ // // Notes: // 'value' must have exactly one bit set. +// It performs the "TrailingZeroCount" operation using intrinsics. // inline unsigned BitPosition(unsigned value) @@ -23,7 +24,7 @@ unsigned BitPosition(unsigned value) } -#ifdef TARGET_64BIT +#ifdef HOST_64BIT //------------------------------------------------------------------------ // BitPosition: Return the position of the single bit that is set in 'value'. // @@ -32,6 +33,7 @@ unsigned BitPosition(unsigned value) // // Notes: // 'value' must have exactly one bit set. +// It performs the "TrailingZeroCount" operation using intrinsics. // inline unsigned BitPosition(unsigned __int64 value) @@ -41,6 +43,6 @@ unsigned BitPosition(unsigned __int64 value) BitScanForward64(&index, value); return index; } -#endif // TARGET_64BIT +#endif // HOST_64BIT #endif diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index d1662fc6d2c184..db0d382131ec7b 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -97,11 +97,16 @@ inline T genFindLowestBit(T value) return (value & (0 - value)); } -/*****************************************************************************/ -/***************************************************************************** - * - * Return the highest bit that is set (that is, a mask that includes just the highest bit). - */ +//------------------------------------------------------------------------ +// genFindHighestBit: Return the highest bit that is set (that is, a mask that includes just the +// highest bit). +// +// Return Value: +// The highest position (0 is LSB) of bit that is set in the 'value'. +// +// Note: +// It performs the "LeadingZeroCount " operation using intrinsics and then mask out everything +// but the highest bit. inline unsigned int genFindHighestBit(unsigned int mask) { assert(mask != 0); @@ -114,6 +119,16 @@ inline unsigned int genFindHighestBit(unsigned int mask) return 1L << index; } +//------------------------------------------------------------------------ +// genFindHighestBit: Return the highest bit that is set (that is, a mask that includes just the +// highest bit). +// +// Return Value: +// The highest position (0 is LSB) of bit that is set in the 'value'. +// +// Note: +// It performs the "LeadingZeroCount " operation using intrinsics and then mask out everything +// but the highest bit. inline unsigned __int64 genFindHighestBit(unsigned __int64 mask) { assert(mask != 0); @@ -200,9 +215,9 @@ inline unsigned uhi32(unsigned __int64 value) inline unsigned genLog2(unsigned __int64 value) { -#ifdef TARGET_64BIT +#ifdef HOST_64BIT return BitPosition(value); -#else // TARGET_32BIT +#else // HOST_32BIT unsigned lo32 = ulo32(value); unsigned hi32 = uhi32(value);