54 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
55 #if defined(_WIN32) && (!defined(__MINGW32__) || (!defined(__i386) && !defined(_M_IX86)))
56 #include "cpu_x86_Windows.ipp"
57 #elif defined(__GNUC__) || defined(__clang__)
58 #include "cpu_x86_Linux.ipp"
68 #ifndef _XCR_XFEATURE_ENABLED_MASK
69 #define _XCR_XFEATURE_ENABLED_MASK 0
72 #ifndef DOXYGEN_SHOULD_SKIP_THIS
73 namespace FeatureDetector
83 void cpuX86::print(
const char *label,
bool yes)
86 cout << (yes ?
"Yes" :
"No") << endl;
94 memset(
this, 0,
sizeof(*
this));
97 bool cpuX86::detect_OS_AVX()
102 bool avxSupported =
false;
106 const unsigned int index_2 = 2;
107 const unsigned int val_27 = 27;
108 const unsigned int val_28 = 28;
110 bool osUsesXSAVE_XRSTORE = (cpuInfo[index_2] & (1U << val_27)) != 0;
111 bool cpuAVXSuport = (cpuInfo[index_2] & (1U << val_28)) != 0;
113 if (osUsesXSAVE_XRSTORE && cpuAVXSuport) {
114 uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
115 avxSupported = (xcrFeatureMask & 0x6U) == 0x6U;
123 bool cpuX86::detect_OS_AVX512()
126 if (!detect_OS_AVX()) {
130 uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
131 return (xcrFeatureMask & 0xe6U) == 0xe6U;
136 std::string cpuX86::get_vendor_string()
143 const unsigned int index_1 = 1;
144 const unsigned int index_2 = 2;
145 const unsigned int index_3 = 3;
146 const unsigned int val_4 = 4;
147 memcpy(name + 0, &CPUInfo[index_1], val_4);
148 memcpy(name + 4, &CPUInfo[index_3], val_4);
149 memcpy(name + 8, &CPUInfo[index_2], val_4);
154 return std::string();
161 void cpuX86::detect_host()
165 OS_x64 = detect_OS_x64();
166 OS_AVX = detect_OS_AVX();
167 OS_AVX512 = detect_OS_AVX512();
170 std::string vendor(get_vendor_string());
171 if (vendor ==
"GenuineIntel") {
174 else if (vendor ==
"AuthenticAMD") {
182 cpuid(info, 0x80000000);
183 uint32_t nExIds = info[0];
184 const unsigned int index_1 = 1;
185 const unsigned int index_2 = 2;
186 const unsigned int index_3 = 3;
187 const unsigned int val_1 = 1;
188 const unsigned int val_3 = 3;
189 const unsigned int val_5 = 5;
190 const unsigned int val_6 = 6;
191 const unsigned int val_8 = 8;
192 const unsigned int val_9 = 9;
193 const unsigned int val_11 = 11;
194 const unsigned int val_12 = 12;
195 const unsigned int val_14 = 14;
196 const unsigned int val_16 = 16;
197 const unsigned int val_17 = 17;
198 const unsigned int val_19 = 19;
199 const unsigned int val_20 = 20;
200 const unsigned int val_21 = 21;
201 const unsigned int val_23 = 23;
202 const unsigned int val_25 = 25;
203 const unsigned int val_26 = 26;
204 const unsigned int val_27 = 27;
205 const unsigned int val_28 = 28;
206 const unsigned int val_29 = 29;
207 const unsigned int val_30 = 30;
208 const unsigned int val_31 = 31;
211 if (nIds >= 0x00000001) {
212 cpuid(info, 0x00000001);
213 HW_MMX = (info[index_3] & (1U << val_23)) != 0U;
214 HW_SSE = (info[index_3] & (1U << val_25)) != 0U;
215 HW_SSE2 = (info[index_3] & (1U << val_26)) != 0U;
216 HW_SSE3 = (info[index_2] & (1U << 0)) != 0U;
218 HW_SSSE3 = (info[index_2] & (1U << val_9)) != 0U;
219 HW_SSE41 = (info[index_2] & (1U << val_19)) != 0U;
220 HW_SSE42 = (info[index_2] & (1U << val_20)) != 0U;
221 HW_AES = (info[index_2] & (1U << val_25)) != 0U;
223 HW_AVX = (info[index_2] & (1U << val_28)) != 0U;
224 HW_FMA3 = (info[index_2] & (1U << val_12)) != 0U;
226 HW_RDRAND = (info[index_2] & (1U << val_30)) != 0U;
228 if (nIds >= 0x00000007) {
229 cpuid(info, 0x00000007);
230 HW_AVX2 = (info[index_1] & (1U << val_5)) != 0U;
232 HW_BMI1 = (info[index_1] & (1U << val_3)) != 0U;
233 HW_BMI2 = (info[index_1] & (1U << val_8)) != 0U;
234 HW_ADX = (info[index_1] & (1U << val_19)) != 0U;
235 HW_MPX = (info[index_1] & (1U << val_14)) != 0U;
236 HW_SHA = (info[index_1] & (1U << val_29)) != 0U;
237 HW_PREFETCHWT1 = (info[index_2] & (1U << 0)) != 0U;
239 HW_AVX512_F = (info[index_1] & (1U << val_16)) != 0U;
240 HW_AVX512_CD = (info[index_1] & (1U << val_28)) != 0U;
241 HW_AVX512_PF = (info[index_1] & (1U << val_26)) != 0U;
242 HW_AVX512_ER = (info[index_1] & (1U << val_27)) != 0U;
243 HW_AVX512_VL = (info[index_1] & (1U << val_31)) != 0U;
244 HW_AVX512_BW = (info[index_1] & (1U << val_30)) != 0U;
245 HW_AVX512_DQ = (info[index_1] & (1U << val_17)) != 0U;
246 HW_AVX512_IFMA = (info[index_1] & (1U << val_21)) != 0U;
247 HW_AVX512_VBMI = (info[index_2] & (1U << val_1)) != 0U;
249 if (nExIds >= 0x80000001) {
250 cpuid(info, 0x80000001);
251 HW_x64 = (info[index_3] & (1U << val_29)) != 0U;
252 HW_ABM = (info[index_2] & (1U << val_5)) != 0U;
253 HW_SSE4a = (info[index_2] & (1U << val_6)) != 0U;
254 HW_FMA4 = (info[index_2] & (1U << val_16)) != 0U;
255 HW_XOP = (info[index_2] & (1U << val_11)) != 0U;
259 void cpuX86::print()
const
261 cout <<
"CPU Vendor:" << endl;
262 print(
" AMD = ", Vendor_AMD);
263 print(
" Intel = ", Vendor_Intel);
266 cout <<
"OS Features:" << endl;
268 print(
" 64-bit = ", OS_x64);
270 print(
" OS AVX = ", OS_AVX);
271 print(
" OS AVX512 = ", OS_AVX512);
274 cout <<
"Hardware Features:" << endl;
275 print(
" MMX = ", HW_MMX);
276 print(
" x64 = ", HW_x64);
277 print(
" ABM = ", HW_ABM);
278 print(
" RDRAND = ", HW_RDRAND);
279 print(
" BMI1 = ", HW_BMI1);
280 print(
" BMI2 = ", HW_BMI2);
281 print(
" ADX = ", HW_ADX);
282 print(
" MPX = ", HW_MPX);
283 print(
" PREFETCHWT1 = ", HW_PREFETCHWT1);
286 cout <<
"SIMD: 128-bit" << endl;
287 print(
" SSE = ", HW_SSE);
288 print(
" SSE2 = ", HW_SSE2);
289 print(
" SSE3 = ", HW_SSE3);
290 print(
" SSSE3 = ", HW_SSSE3);
291 print(
" SSE4a = ", HW_SSE4a);
292 print(
" SSE4.1 = ", HW_SSE41);
293 print(
" SSE4.2 = ", HW_SSE42);
294 print(
" AES-NI = ", HW_AES);
295 print(
" SHA = ", HW_SHA);
298 cout <<
"SIMD: 256-bit" << endl;
299 print(
" AVX = ", HW_AVX);
300 print(
" XOP = ", HW_XOP);
301 print(
" FMA3 = ", HW_FMA3);
302 print(
" FMA4 = ", HW_FMA4);
303 print(
" AVX2 = ", HW_AVX2);
306 cout <<
"SIMD: 512-bit" << endl;
307 print(
" AVX512-F = ", HW_AVX512_F);
308 print(
" AVX512-CD = ", HW_AVX512_CD);
309 print(
" AVX512-PF = ", HW_AVX512_PF);
310 print(
" AVX512-ER = ", HW_AVX512_ER);
311 print(
" AVX512-VL = ", HW_AVX512_VL);
312 print(
" AVX512-BW = ", HW_AVX512_BW);
313 print(
" AVX512-DQ = ", HW_AVX512_DQ);
314 print(
" AVX512-IFMA = ", HW_AVX512_IFMA);
315 print(
" AVX512-VBMI = ", HW_AVX512_VBMI);
318 cout <<
"Summary:" << endl;
319 print(
" Safe to use AVX: ", HW_AVX && OS_AVX);
320 print(
" Safe to use AVX512: ", HW_AVX512_F && OS_AVX512);