|
34 | 34 | #ifdef TRY_POPCNT_FAST |
35 | 35 |
|
36 | 36 | /* |
37 | | - * Returns true if the CPU supports the instructions required for the AVX-512 |
38 | | - * pg_popcount() implementation. |
| 37 | + * Does CPUID say there's support for XSAVE instructions? |
39 | 38 | */ |
40 | | -bool |
41 | | -pg_popcount_avx512_available(void) |
| 39 | +static inline bool |
| 40 | +xsave_available(void) |
42 | 41 | { |
43 | 42 | unsigned int exx[4] = {0, 0, 0, 0}; |
44 | 43 |
|
45 | | - /* Does CPUID say there's support for AVX-512 popcount instructions? */ |
46 | | -#if defined(HAVE__GET_CPUID_COUNT) |
47 | | - __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); |
48 | | -#elif defined(HAVE__CPUIDEX) |
49 | | - __cpuidex(exx, 7, 0); |
50 | | -#else |
51 | | -#error cpuid instruction not available |
52 | | -#endif |
53 | | - if ((exx[2] & (1 << 14)) == 0) /* avx512-vpopcntdq */ |
54 | | - return false; |
55 | | - |
56 | | - /* Does CPUID say there's support for AVX-512 byte and word instructions? */ |
57 | | - memset(exx, 0, sizeof(exx)); |
58 | | -#if defined(HAVE__GET_CPUID_COUNT) |
59 | | - __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); |
60 | | -#elif defined(HAVE__CPUIDEX) |
61 | | - __cpuidex(exx, 7, 0); |
62 | | -#else |
63 | | -#error cpuid instruction not available |
64 | | -#endif |
65 | | - if ((exx[1] & (1 << 30)) == 0) /* avx512-bw */ |
66 | | - return false; |
67 | | - |
68 | | - /* Does CPUID say there's support for XSAVE instructions? */ |
69 | | - memset(exx, 0, sizeof(exx)); |
70 | 44 | #if defined(HAVE__GET_CPUID) |
71 | 45 | __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); |
72 | 46 | #elif defined(HAVE__CPUID) |
73 | 47 | __cpuid(exx, 1); |
74 | 48 | #else |
75 | 49 | #error cpuid instruction not available |
76 | 50 | #endif |
77 | | - if ((exx[2] & (1 << 26)) == 0) /* xsave */ |
78 | | - return false; |
| 51 | + return (exx[2] & (1 << 27)) != 0; /* osxsave */ |
| 52 | +} |
79 | 53 |
|
80 | | - /* Does XGETBV say the ZMM registers are enabled? */ |
| 54 | +/* |
| 55 | + * Does XGETBV say the ZMM registers are enabled? |
| 56 | + * |
| 57 | + * NB: Caller is responsible for verifying that xsave_available() returns true |
| 58 | + * before calling this. |
| 59 | + */ |
| 60 | +static inline bool |
| 61 | +zmm_regs_available(void) |
| 62 | +{ |
81 | 63 | #ifdef HAVE_XSAVE_INTRINSICS |
82 | | - return (_xgetbv(0) & 0xe0) != 0; |
| 64 | + return (_xgetbv(0) & 0xe6) == 0xe6; |
83 | 65 | #else |
84 | 66 | return false; |
85 | 67 | #endif |
86 | 68 | } |
87 | 69 |
|
| 70 | +/* |
| 71 | + * Does CPUID say there's support for AVX-512 popcount and byte-and-word |
| 72 | + * instructions? |
| 73 | + */ |
| 74 | +static inline bool |
| 75 | +avx512_popcnt_available(void) |
| 76 | +{ |
| 77 | + unsigned int exx[4] = {0, 0, 0, 0}; |
| 78 | + |
| 79 | +#if defined(HAVE__GET_CPUID_COUNT) |
| 80 | + __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); |
| 81 | +#elif defined(HAVE__CPUIDEX) |
| 82 | + __cpuidex(exx, 7, 0); |
| 83 | +#else |
| 84 | +#error cpuid instruction not available |
| 85 | +#endif |
| 86 | + return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */ |
| 87 | + (exx[1] & (1 << 30)) != 0; /* avx512-bw */ |
| 88 | +} |
| 89 | + |
| 90 | +/* |
| 91 | + * Returns true if the CPU supports the instructions required for the AVX-512 |
| 92 | + * pg_popcount() implementation. |
| 93 | + */ |
| 94 | +bool |
| 95 | +pg_popcount_avx512_available(void) |
| 96 | +{ |
| 97 | + return xsave_available() && |
| 98 | + zmm_regs_available() && |
| 99 | + avx512_popcnt_available(); |
| 100 | +} |
| 101 | + |
88 | 102 | #endif /* TRY_POPCNT_FAST */ |
0 commit comments