github.com/arthurnavah/cpuid/v2@v2.0.14/cpuid.go (about) 1 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. 2 3 // Package cpuid provides information about the CPU running the current program. 4 // 5 // CPU features are detected on startup, and kept for fast access through the life of the application. 6 // Currently x86 / x64 (AMD64) as well as arm64 is supported. 7 // 8 // You can access the CPU information by accessing the shared CPU variable of the cpuid library. 9 // 10 // Package home: https://github.com/klauspost/cpuid 11 package cpuid 12 13 import ( 14 "flag" 15 "fmt" 16 "math" 17 "os" 18 "runtime" 19 "strings" 20 ) 21 22 // AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf 23 // and Processor Programming Reference (PPR) 24 25 // Vendor is a representation of a CPU vendor. 26 type Vendor int 27 28 const ( 29 VendorUnknown Vendor = iota 30 Intel 31 AMD 32 VIA 33 Transmeta 34 NSC 35 KVM // Kernel-based Virtual Machine 36 MSVM // Microsoft Hyper-V or Windows Virtual PC 37 VMware 38 XenHVM 39 Bhyve 40 Hygon 41 SiS 42 RDC 43 44 Ampere 45 ARM 46 Broadcom 47 Cavium 48 DEC 49 Fujitsu 50 Infineon 51 Motorola 52 NVIDIA 53 AMCC 54 Qualcomm 55 Marvell 56 57 lastVendor 58 ) 59 60 //go:generate stringer -type=FeatureID,Vendor 61 62 // FeatureID is the ID of a specific cpu feature. 63 type FeatureID int 64 65 const ( 66 // Keep index -1 as unknown 67 UNKNOWN = -1 68 69 // Add features 70 ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 71 AESNI // Advanced Encryption Standard New Instructions 72 AMD3DNOW // AMD 3DNOW 73 AMD3DNOWEXT // AMD 3DNowExt 74 AMXBF16 // Tile computational operations on BFLOAT16 numbers 75 AMXINT8 // Tile computational operations on 8-bit integers 76 AMXTILE // Tile architecture 77 AVX // AVX functions 78 AVX2 // AVX2 functions 79 AVX512BF16 // AVX-512 BFLOAT16 Instructions 80 AVX512BITALG // AVX-512 Bit Algorithms 81 AVX512BW // AVX-512 Byte and Word Instructions 82 AVX512CD // AVX-512 Conflict Detection Instructions 83 AVX512DQ // AVX-512 Doubleword and Quadword Instructions 84 AVX512ER // AVX-512 Exponential and Reciprocal Instructions 85 AVX512F // AVX-512 Foundation 86 AVX512FP16 // AVX-512 FP16 Instructions 87 AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions 88 AVX512PF // AVX-512 Prefetch Instructions 89 AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions 90 AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 91 AVX512VL // AVX-512 Vector Length Extensions 92 AVX512VNNI // AVX-512 Vector Neural Network Instructions 93 AVX512VP2INTERSECT // AVX-512 Intersect for D/Q 94 AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword 95 AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one. 96 BMI1 // Bit Manipulation Instruction Set 1 97 BMI2 // Bit Manipulation Instruction Set 2 98 CETIBT // Intel CET Indirect Branch Tracking 99 CETSS // Intel CET Shadow Stack 100 CLDEMOTE // Cache Line Demote 101 CLMUL // Carry-less Multiplication 102 CLZERO // CLZERO instruction supported 103 CMOV // i686 CMOV 104 CMPXCHG8 // CMPXCHG8 instruction 105 CPBOOST // Core Performance Boost 106 CX16 // CMPXCHG16B Instruction 107 ENQCMD // Enqueue Command 108 ERMS // Enhanced REP MOVSB/STOSB 109 F16C // Half-precision floating-point conversion 110 FMA3 // Intel FMA 3. Does not imply AVX. 111 FMA4 // Bulldozer FMA4 functions 112 FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 113 FXSROPT // FXSAVE/FXRSTOR optimizations 114 GFNI // Galois Field New Instructions 115 HLE // Hardware Lock Elision 116 HTT // Hyperthreading (enabled) 117 HWA // Hardware assert supported. Indicates support for MSRC001_10 118 HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors 119 IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) 120 IBS // Instruction Based Sampling (AMD) 121 IBSBRNTRGT // Instruction Based Sampling Feature (AMD) 122 IBSFETCHSAM // Instruction Based Sampling Feature (AMD) 123 IBSFFV // Instruction Based Sampling Feature (AMD) 124 IBSOPCNT // Instruction Based Sampling Feature (AMD) 125 IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) 126 IBSOPSAM // Instruction Based Sampling Feature (AMD) 127 IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) 128 IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) 129 INT_WBINVD // WBINVD/WBNOINVD are interruptible. 130 INVLPGB // NVLPGB and TLBSYNC instruction supported 131 LAHF // LAHF/SAHF in long mode 132 LZCNT // LZCNT instruction 133 MCAOVERFLOW // MCA overflow recovery support. 134 MCOMMIT // MCOMMIT instruction supported 135 MMX // standard MMX 136 MMXEXT // SSE integer functions or AMD MMX ext 137 MOVBE // MOVBE instruction (big-endian) 138 MOVDIR64B // Move 64 Bytes as Direct Store 139 MOVDIRI // Move Doubleword as Direct Store 140 MPX // Intel MPX (Memory Protection Extensions) 141 MSRIRC // Instruction Retired Counter MSR available 142 NX // NX (No-Execute) bit 143 OSXSAVE // XSAVE enabled by OS 144 POPCNT // POPCNT instruction 145 RDPRU // RDPRU instruction supported 146 RDRAND // RDRAND instruction is available 147 RDSEED // RDSEED instruction is available 148 RDTSCP // RDTSCP Instruction 149 RTM // Restricted Transactional Memory 150 RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. 151 SCE // SYSENTER and SYSEXIT instructions 152 SERIALIZE // Serialize Instruction Execution 153 SGX // Software Guard Extensions 154 SGXLC // Software Guard Extensions Launch Control 155 SHA // Intel SHA Extensions 156 SSE // SSE functions 157 SSE2 // P4 SSE functions 158 SSE3 // Prescott SSE3 functions 159 SSE4 // Penryn SSE4.1 functions 160 SSE42 // Nehalem SSE4.2 functions 161 SSE4A // AMD Barcelona microarchitecture SSE4a instructions 162 SSSE3 // Conroe SSSE3 functions 163 STIBP // Single Thread Indirect Branch Predictors 164 SUCCOR // Software uncorrectable error containment and recovery capability. 165 TBM // AMD Trailing Bit Manipulation 166 TSXLDTRK // Intel TSX Suspend Load Address Tracking 167 VAES // Vector AES 168 VMX // Virtual Machine Extensions 169 VPCLMULQDQ // Carry-Less Multiplication Quadword 170 WAITPKG // TPAUSE, UMONITOR, UMWAIT 171 WBNOINVD // Write Back and Do Not Invalidate Cache 172 X87 // FPU 173 XOP // Bulldozer XOP functions 174 XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV 175 176 // ARM features: 177 AESARM // AES instructions 178 ARMCPUID // Some CPU ID registers readable at user-level 179 ASIMD // Advanced SIMD 180 ASIMDDP // SIMD Dot Product 181 ASIMDHP // Advanced SIMD half-precision floating point 182 ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) 183 ATOMICS // Large System Extensions (LSE) 184 CRC32 // CRC32/CRC32C instructions 185 DCPOP // Data cache clean to Point of Persistence (DC CVAP) 186 EVTSTRM // Generic timer 187 FCMA // Floatin point complex number addition and multiplication 188 FP // Single-precision and double-precision floating point 189 FPHP // Half-precision floating point 190 GPA // Generic Pointer Authentication 191 JSCVT // Javascript-style double->int convert (FJCVTZS) 192 LRCPC // Weaker release consistency (LDAPR, etc) 193 PMULL // Polynomial Multiply instructions (PMULL/PMULL2) 194 SHA1 // SHA-1 instructions (SHA1C, etc) 195 SHA2 // SHA-2 instructions (SHA256H, etc) 196 SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) 197 SHA512 // SHA512 instructions 198 SM3 // SM3 instructions 199 SM4 // SM4 instructions 200 SVE // Scalable Vector Extension 201 202 // Keep it last. It automatically defines the size of []flagSet 203 lastID 204 205 firstID FeatureID = UNKNOWN + 1 206 ) 207 208 // CPUInfo contains information about the detected system CPU. 209 type CPUInfo struct { 210 BrandName string // Brand name reported by the CPU 211 VendorID Vendor // Comparable CPU vendor ID 212 VendorString string // Raw vendor string. 213 featureSet flagSet // Features of the CPU 214 PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. 215 ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. 216 LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. 217 Family int // CPU family number 218 Model int // CPU model number 219 CacheLine int // Cache line size in bytes. Will be 0 if undetectable. 220 Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. 221 BoostFreq int64 // Max clock speed, if known, 0 otherwise 222 Cache struct { 223 L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected 224 L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected 225 L2 int // L2 Cache (per core or shared). Will be -1 if undetected 226 L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected 227 } 228 SGX SGXSupport 229 maxFunc uint32 230 maxExFunc uint32 231 } 232 233 var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) 234 var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) 235 var xgetbv func(index uint32) (eax, edx uint32) 236 var rdtscpAsm func() (eax, ebx, ecx, edx uint32) 237 var darwinHasAVX512 = func() bool { return false } 238 239 // CPU contains information about the CPU as detected on startup, 240 // or when Detect last was called. 241 // 242 // Use this as the primary entry point to you data. 243 var CPU CPUInfo 244 245 func init() { 246 initCPU() 247 Detect() 248 } 249 250 // Detect will re-detect current CPU info. 251 // This will replace the content of the exported CPU variable. 252 // 253 // Unless you expect the CPU to change while you are running your program 254 // you should not need to call this function. 255 // If you call this, you must ensure that no other goroutine is accessing the 256 // exported CPU variable. 257 func Detect() { 258 // Set defaults 259 CPU.ThreadsPerCore = 1 260 CPU.Cache.L1I = -1 261 CPU.Cache.L1D = -1 262 CPU.Cache.L2 = -1 263 CPU.Cache.L3 = -1 264 safe := true 265 if detectArmFlag != nil { 266 safe = !*detectArmFlag 267 } 268 addInfo(&CPU, safe) 269 if displayFeats != nil && *displayFeats { 270 fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ",")) 271 // Exit with non-zero so tests will print value. 272 os.Exit(1) 273 } 274 if disableFlag != nil { 275 s := strings.Split(*disableFlag, ",") 276 for _, feat := range s { 277 feat := ParseFeature(strings.TrimSpace(feat)) 278 if feat != UNKNOWN { 279 CPU.featureSet.unset(feat) 280 } 281 } 282 } 283 } 284 285 // DetectARM will detect ARM64 features. 286 // This is NOT done automatically since it can potentially crash 287 // if the OS does not handle the command. 288 // If in the future this can be done safely this function may not 289 // do anything. 290 func DetectARM() { 291 addInfo(&CPU, false) 292 } 293 294 var detectArmFlag *bool 295 var displayFeats *bool 296 var disableFlag *string 297 298 // Flags will enable flags. 299 // This must be called *before* flag.Parse AND 300 // Detect must be called after the flags have been parsed. 301 // Note that this means that any detection used in init() functions 302 // will not contain these flags. 303 func Flags() { 304 disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list") 305 displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits") 306 detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash") 307 } 308 309 // Supports returns whether the CPU supports all of the requested features. 310 func (c CPUInfo) Supports(ids ...FeatureID) bool { 311 for _, id := range ids { 312 if !c.featureSet.inSet(id) { 313 return false 314 } 315 } 316 return true 317 } 318 319 // Has allows for checking a single feature. 320 // Should be inlined by the compiler. 321 func (c CPUInfo) Has(id FeatureID) bool { 322 return c.featureSet.inSet(id) 323 } 324 325 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels 326 var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2) 327 var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) 328 var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) 329 var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) 330 331 // X64Level returns the microarchitecture level detected on the CPU. 332 // If features are lacking or non x64 mode, 0 is returned. 333 // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels 334 func (c CPUInfo) X64Level() int { 335 if c.featureSet.hasSet(level4Features) { 336 return 4 337 } 338 if c.featureSet.hasSet(level3Features) { 339 return 3 340 } 341 if c.featureSet.hasSet(level2Features) { 342 return 2 343 } 344 if c.featureSet.hasSet(level1Features) { 345 return 1 346 } 347 return 0 348 } 349 350 // Disable will disable one or several features. 351 func (c *CPUInfo) Disable(ids ...FeatureID) bool { 352 for _, id := range ids { 353 c.featureSet.unset(id) 354 } 355 return true 356 } 357 358 // Enable will disable one or several features even if they were undetected. 359 // This is of course not recommended for obvious reasons. 360 func (c *CPUInfo) Enable(ids ...FeatureID) bool { 361 for _, id := range ids { 362 c.featureSet.set(id) 363 } 364 return true 365 } 366 367 // IsVendor returns true if vendor is recognized as Intel 368 func (c CPUInfo) IsVendor(v Vendor) bool { 369 return c.VendorID == v 370 } 371 372 func (c CPUInfo) FeatureSet() []string { 373 s := make([]string, 0) 374 s = append(s, c.featureSet.Strings()...) 375 return s 376 } 377 378 // RTCounter returns the 64-bit time-stamp counter 379 // Uses the RDTSCP instruction. The value 0 is returned 380 // if the CPU does not support the instruction. 381 func (c CPUInfo) RTCounter() uint64 { 382 if !c.Supports(RDTSCP) { 383 return 0 384 } 385 a, _, _, d := rdtscpAsm() 386 return uint64(a) | (uint64(d) << 32) 387 } 388 389 // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. 390 // This variable is OS dependent, but on Linux contains information 391 // about the current cpu/core the code is running on. 392 // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. 393 func (c CPUInfo) Ia32TscAux() uint32 { 394 if !c.Supports(RDTSCP) { 395 return 0 396 } 397 _, _, ecx, _ := rdtscpAsm() 398 return ecx 399 } 400 401 // LogicalCPU will return the Logical CPU the code is currently executing on. 402 // This is likely to change when the OS re-schedules the running thread 403 // to another CPU. 404 // If the current core cannot be detected, -1 will be returned. 405 func (c CPUInfo) LogicalCPU() int { 406 if c.maxFunc < 1 { 407 return -1 408 } 409 _, ebx, _, _ := cpuid(1) 410 return int(ebx >> 24) 411 } 412 413 // frequencies tries to compute the clock speed of the CPU. If leaf 15 is 414 // supported, use it, otherwise parse the brand string. Yes, really. 415 func (c *CPUInfo) frequencies() { 416 c.Hz, c.BoostFreq = 0, 0 417 mfi := maxFunctionID() 418 if mfi >= 0x15 { 419 eax, ebx, ecx, _ := cpuid(0x15) 420 if eax != 0 && ebx != 0 && ecx != 0 { 421 c.Hz = (int64(ecx) * int64(ebx)) / int64(eax) 422 } 423 } 424 if mfi >= 0x16 { 425 a, b, _, _ := cpuid(0x16) 426 // Base... 427 if a&0xffff > 0 { 428 c.Hz = int64(a&0xffff) * 1_000_000 429 } 430 // Boost... 431 if b&0xffff > 0 { 432 c.BoostFreq = int64(b&0xffff) * 1_000_000 433 } 434 } 435 if c.Hz > 0 { 436 return 437 } 438 439 // computeHz determines the official rated speed of a CPU from its brand 440 // string. This insanity is *actually the official documented way to do 441 // this according to Intel*, prior to leaf 0x15 existing. The official 442 // documentation only shows this working for exactly `x.xx` or `xxxx` 443 // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other 444 // sizes. 445 model := c.BrandName 446 hz := strings.LastIndex(model, "Hz") 447 if hz < 3 { 448 return 449 } 450 var multiplier int64 451 switch model[hz-1] { 452 case 'M': 453 multiplier = 1000 * 1000 454 case 'G': 455 multiplier = 1000 * 1000 * 1000 456 case 'T': 457 multiplier = 1000 * 1000 * 1000 * 1000 458 } 459 if multiplier == 0 { 460 return 461 } 462 freq := int64(0) 463 divisor := int64(0) 464 decimalShift := int64(1) 465 var i int 466 for i = hz - 2; i >= 0 && model[i] != ' '; i-- { 467 if model[i] >= '0' && model[i] <= '9' { 468 freq += int64(model[i]-'0') * decimalShift 469 decimalShift *= 10 470 } else if model[i] == '.' { 471 if divisor != 0 { 472 return 473 } 474 divisor = decimalShift 475 } else { 476 return 477 } 478 } 479 // we didn't find a space 480 if i < 0 { 481 return 482 } 483 if divisor != 0 { 484 c.Hz = (freq * multiplier) / divisor 485 return 486 } 487 c.Hz = freq * multiplier 488 } 489 490 // VM Will return true if the cpu id indicates we are in 491 // a virtual machine. 492 func (c CPUInfo) VM() bool { 493 return CPU.featureSet.inSet(HYPERVISOR) 494 } 495 496 // flags contains detected cpu features and characteristics 497 type flags uint64 498 499 // log2(bits_in_uint64) 500 const flagBitsLog2 = 6 501 const flagBits = 1 << flagBitsLog2 502 const flagMask = flagBits - 1 503 504 // flagSet contains detected cpu features and characteristics in an array of flags 505 type flagSet [(lastID + flagMask) / flagBits]flags 506 507 func (s flagSet) inSet(feat FeatureID) bool { 508 return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 509 } 510 511 func (s *flagSet) set(feat FeatureID) { 512 s[feat>>flagBitsLog2] |= 1 << (feat & flagMask) 513 } 514 515 // setIf will set a feature if boolean is true. 516 func (s *flagSet) setIf(cond bool, features ...FeatureID) { 517 if cond { 518 for _, offset := range features { 519 s[offset>>flagBitsLog2] |= 1 << (offset & flagMask) 520 } 521 } 522 } 523 524 func (s *flagSet) unset(offset FeatureID) { 525 bit := flags(1 << (offset & flagMask)) 526 s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit 527 } 528 529 // or with another flagset. 530 func (s *flagSet) or(other flagSet) { 531 for i, v := range other[:] { 532 s[i] |= v 533 } 534 } 535 536 // hasSet returns whether all features are present. 537 func (s flagSet) hasSet(other flagSet) bool { 538 for i, v := range other[:] { 539 if s[i]&v != v { 540 return false 541 } 542 } 543 return true 544 } 545 546 func flagSetWith(feat ...FeatureID) flagSet { 547 var res flagSet 548 for _, f := range feat { 549 res.set(f) 550 } 551 return res 552 } 553 554 // ParseFeature will parse the string and return the ID of the matching feature. 555 // Will return UNKNOWN if not found. 556 func ParseFeature(s string) FeatureID { 557 s = strings.ToUpper(s) 558 for i := firstID; i < lastID; i++ { 559 if i.String() == s { 560 return i 561 } 562 } 563 return UNKNOWN 564 } 565 566 // Strings returns an array of the detected features for FlagsSet. 567 func (s flagSet) Strings() []string { 568 if len(s) == 0 { 569 return []string{""} 570 } 571 r := make([]string, 0) 572 for i := firstID; i < lastID; i++ { 573 if s.inSet(i) { 574 r = append(r, i.String()) 575 } 576 } 577 return r 578 } 579 580 func maxExtendedFunction() uint32 { 581 eax, _, _, _ := cpuid(0x80000000) 582 return eax 583 } 584 585 func maxFunctionID() uint32 { 586 a, _, _, _ := cpuid(0) 587 return a 588 } 589 590 func brandName() string { 591 if maxExtendedFunction() >= 0x80000004 { 592 v := make([]uint32, 0, 48) 593 for i := uint32(0); i < 3; i++ { 594 a, b, c, d := cpuid(0x80000002 + i) 595 v = append(v, a, b, c, d) 596 } 597 return strings.Trim(string(valAsString(v...)), " ") 598 } 599 return "unknown" 600 } 601 602 func threadsPerCore() int { 603 mfi := maxFunctionID() 604 vend, _ := vendorID() 605 606 if mfi < 0x4 || (vend != Intel && vend != AMD) { 607 return 1 608 } 609 610 if mfi < 0xb { 611 if vend != Intel { 612 return 1 613 } 614 _, b, _, d := cpuid(1) 615 if (d & (1 << 28)) != 0 { 616 // v will contain logical core count 617 v := (b >> 16) & 255 618 if v > 1 { 619 a4, _, _, _ := cpuid(4) 620 // physical cores 621 v2 := (a4 >> 26) + 1 622 if v2 > 0 { 623 return int(v) / int(v2) 624 } 625 } 626 } 627 return 1 628 } 629 _, b, _, _ := cpuidex(0xb, 0) 630 if b&0xffff == 0 { 631 if vend == AMD { 632 // Workaround for AMD returning 0, assume 2 if >= Zen 2 633 // It will be more correct than not. 634 fam, _ := familyModel() 635 _, _, _, d := cpuid(1) 636 if (d&(1<<28)) != 0 && fam >= 23 { 637 return 2 638 } 639 } 640 return 1 641 } 642 return int(b & 0xffff) 643 } 644 645 func logicalCores() int { 646 mfi := maxFunctionID() 647 v, _ := vendorID() 648 switch v { 649 case Intel: 650 // Use this on old Intel processors 651 if mfi < 0xb { 652 if mfi < 1 { 653 return 0 654 } 655 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) 656 // that can be assigned to logical processors in a physical package. 657 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. 658 _, ebx, _, _ := cpuid(1) 659 logical := (ebx >> 16) & 0xff 660 return int(logical) 661 } 662 _, b, _, _ := cpuidex(0xb, 1) 663 return int(b & 0xffff) 664 case AMD, Hygon: 665 _, b, _, _ := cpuid(1) 666 return int((b >> 16) & 0xff) 667 default: 668 return 0 669 } 670 } 671 672 func familyModel() (int, int) { 673 if maxFunctionID() < 0x1 { 674 return 0, 0 675 } 676 eax, _, _, _ := cpuid(1) 677 family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) 678 model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) 679 return int(family), int(model) 680 } 681 682 func physicalCores() int { 683 v, _ := vendorID() 684 switch v { 685 case Intel: 686 return logicalCores() / threadsPerCore() 687 case AMD, Hygon: 688 lc := logicalCores() 689 tpc := threadsPerCore() 690 if lc > 0 && tpc > 0 { 691 return lc / tpc 692 } 693 694 // The following is inaccurate on AMD EPYC 7742 64-Core Processor 695 if maxExtendedFunction() >= 0x80000008 { 696 _, _, c, _ := cpuid(0x80000008) 697 if c&0xff > 0 { 698 return int(c&0xff) + 1 699 } 700 } 701 } 702 return 0 703 } 704 705 // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID 706 var vendorMapping = map[string]Vendor{ 707 "AMDisbetter!": AMD, 708 "AuthenticAMD": AMD, 709 "CentaurHauls": VIA, 710 "GenuineIntel": Intel, 711 "TransmetaCPU": Transmeta, 712 "GenuineTMx86": Transmeta, 713 "Geode by NSC": NSC, 714 "VIA VIA VIA ": VIA, 715 "KVMKVMKVMKVM": KVM, 716 "Microsoft Hv": MSVM, 717 "VMwareVMware": VMware, 718 "XenVMMXenVMM": XenHVM, 719 "bhyve bhyve ": Bhyve, 720 "HygonGenuine": Hygon, 721 "Vortex86 SoC": SiS, 722 "SiS SiS SiS ": SiS, 723 "RiseRiseRise": SiS, 724 "Genuine RDC": RDC, 725 } 726 727 func vendorID() (Vendor, string) { 728 _, b, c, d := cpuid(0) 729 v := string(valAsString(b, d, c)) 730 vend, ok := vendorMapping[v] 731 if !ok { 732 return VendorUnknown, v 733 } 734 return vend, v 735 } 736 737 func cacheLine() int { 738 if maxFunctionID() < 0x1 { 739 return 0 740 } 741 742 _, ebx, _, _ := cpuid(1) 743 cache := (ebx & 0xff00) >> 5 // cflush size 744 if cache == 0 && maxExtendedFunction() >= 0x80000006 { 745 _, _, ecx, _ := cpuid(0x80000006) 746 cache = ecx & 0xff // cacheline size 747 } 748 // TODO: Read from Cache and TLB Information 749 return int(cache) 750 } 751 752 func (c *CPUInfo) cacheSize() { 753 c.Cache.L1D = -1 754 c.Cache.L1I = -1 755 c.Cache.L2 = -1 756 c.Cache.L3 = -1 757 vendor, _ := vendorID() 758 switch vendor { 759 case Intel: 760 if maxFunctionID() < 4 { 761 return 762 } 763 c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0 764 for i := uint32(0); ; i++ { 765 eax, ebx, ecx, _ := cpuidex(4, i) 766 cacheType := eax & 15 767 if cacheType == 0 { 768 break 769 } 770 cacheLevel := (eax >> 5) & 7 771 coherency := int(ebx&0xfff) + 1 772 partitions := int((ebx>>12)&0x3ff) + 1 773 associativity := int((ebx>>22)&0x3ff) + 1 774 sets := int(ecx) + 1 775 size := associativity * partitions * coherency * sets 776 switch cacheLevel { 777 case 1: 778 if cacheType == 1 { 779 // 1 = Data Cache 780 c.Cache.L1D = size 781 } else if cacheType == 2 { 782 // 2 = Instruction Cache 783 c.Cache.L1I = size 784 } else { 785 if c.Cache.L1D < 0 { 786 c.Cache.L1I = size 787 } 788 if c.Cache.L1I < 0 { 789 c.Cache.L1I = size 790 } 791 } 792 case 2: 793 c.Cache.L2 = size 794 case 3: 795 c.Cache.L3 = size 796 } 797 } 798 case AMD, Hygon: 799 // Untested. 800 if maxExtendedFunction() < 0x80000005 { 801 return 802 } 803 _, _, ecx, edx := cpuid(0x80000005) 804 c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) 805 c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) 806 807 if maxExtendedFunction() < 0x80000006 { 808 return 809 } 810 _, _, ecx, _ = cpuid(0x80000006) 811 c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) 812 813 // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties 814 if maxExtendedFunction() < 0x8000001D { 815 return 816 } 817 for i := uint32(0); i < math.MaxUint32; i++ { 818 eax, ebx, ecx, _ := cpuidex(0x8000001D, i) 819 820 level := (eax >> 5) & 7 821 cacheNumSets := ecx + 1 822 cacheLineSize := 1 + (ebx & 2047) 823 cachePhysPartitions := 1 + ((ebx >> 12) & 511) 824 cacheNumWays := 1 + ((ebx >> 22) & 511) 825 826 typ := eax & 15 827 size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) 828 if typ == 0 { 829 return 830 } 831 832 switch level { 833 case 1: 834 switch typ { 835 case 1: 836 // Data cache 837 c.Cache.L1D = size 838 case 2: 839 // Inst cache 840 c.Cache.L1I = size 841 default: 842 if c.Cache.L1D < 0 { 843 c.Cache.L1I = size 844 } 845 if c.Cache.L1I < 0 { 846 c.Cache.L1I = size 847 } 848 } 849 case 2: 850 c.Cache.L2 = size 851 case 3: 852 c.Cache.L3 = size 853 } 854 } 855 } 856 } 857 858 type SGXEPCSection struct { 859 BaseAddress uint64 860 EPCSize uint64 861 } 862 863 type SGXSupport struct { 864 Available bool 865 LaunchControl bool 866 SGX1Supported bool 867 SGX2Supported bool 868 MaxEnclaveSizeNot64 int64 869 MaxEnclaveSize64 int64 870 EPCSections []SGXEPCSection 871 } 872 873 func hasSGX(available, lc bool) (rval SGXSupport) { 874 rval.Available = available 875 876 if !available { 877 return 878 } 879 880 rval.LaunchControl = lc 881 882 a, _, _, d := cpuidex(0x12, 0) 883 rval.SGX1Supported = a&0x01 != 0 884 rval.SGX2Supported = a&0x02 != 0 885 rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 886 rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 887 rval.EPCSections = make([]SGXEPCSection, 0) 888 889 for subleaf := uint32(2); subleaf < 2+8; subleaf++ { 890 eax, ebx, ecx, edx := cpuidex(0x12, subleaf) 891 leafType := eax & 0xf 892 893 if leafType == 0 { 894 // Invalid subleaf, stop iterating 895 break 896 } else if leafType == 1 { 897 // EPC Section subleaf 898 baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) 899 size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) 900 901 section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} 902 rval.EPCSections = append(rval.EPCSections, section) 903 } 904 } 905 906 return 907 } 908 909 func support() flagSet { 910 var fs flagSet 911 mfi := maxFunctionID() 912 vend, _ := vendorID() 913 if mfi < 0x1 { 914 return fs 915 } 916 family, model := familyModel() 917 918 _, _, c, d := cpuid(1) 919 fs.setIf((d&(1<<0)) != 0, X87) 920 fs.setIf((d&(1<<8)) != 0, CMPXCHG8) 921 fs.setIf((d&(1<<11)) != 0, SCE) 922 fs.setIf((d&(1<<15)) != 0, CMOV) 923 fs.setIf((d&(1<<22)) != 0, MMXEXT) 924 fs.setIf((d&(1<<23)) != 0, MMX) 925 fs.setIf((d&(1<<24)) != 0, FXSR) 926 fs.setIf((d&(1<<25)) != 0, FXSROPT) 927 fs.setIf((d&(1<<25)) != 0, SSE) 928 fs.setIf((d&(1<<26)) != 0, SSE2) 929 fs.setIf((c&1) != 0, SSE3) 930 fs.setIf((c&(1<<5)) != 0, VMX) 931 fs.setIf((c&0x00000200) != 0, SSSE3) 932 fs.setIf((c&0x00080000) != 0, SSE4) 933 fs.setIf((c&0x00100000) != 0, SSE42) 934 fs.setIf((c&(1<<25)) != 0, AESNI) 935 fs.setIf((c&(1<<1)) != 0, CLMUL) 936 fs.setIf(c&(1<<22) != 0, MOVBE) 937 fs.setIf(c&(1<<23) != 0, POPCNT) 938 fs.setIf(c&(1<<30) != 0, RDRAND) 939 940 // This bit has been reserved by Intel & AMD for use by hypervisors, 941 // and indicates the presence of a hypervisor. 942 fs.setIf(c&(1<<31) != 0, HYPERVISOR) 943 fs.setIf(c&(1<<29) != 0, F16C) 944 fs.setIf(c&(1<<13) != 0, CX16) 945 946 if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { 947 fs.setIf(threadsPerCore() > 1, HTT) 948 } 949 if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { 950 fs.setIf(threadsPerCore() > 1, HTT) 951 } 952 fs.setIf(c&1<<26 != 0, XSAVE) 953 fs.setIf(c&1<<27 != 0, OSXSAVE) 954 // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits 955 const avxCheck = 1<<26 | 1<<27 | 1<<28 956 if c&avxCheck == avxCheck { 957 // Check for OS support 958 eax, _ := xgetbv(0) 959 if (eax & 0x6) == 0x6 { 960 fs.set(AVX) 961 switch vend { 962 case Intel: 963 // Older than Haswell. 964 fs.setIf(family == 6 && model < 60, AVXSLOW) 965 case AMD: 966 // Older than Zen 2 967 fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW) 968 } 969 } 970 } 971 // FMA3 can be used with SSE registers, so no OS support is strictly needed. 972 // fma3 and OSXSAVE needed. 973 const fma3Check = 1<<12 | 1<<27 974 fs.setIf(c&fma3Check == fma3Check, FMA3) 975 976 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. 977 if mfi >= 7 { 978 _, ebx, ecx, edx := cpuidex(7, 0) 979 eax1, _, _, _ := cpuidex(7, 1) 980 if fs.inSet(AVX) && (ebx&0x00000020) != 0 { 981 fs.set(AVX2) 982 } 983 // CPUID.(EAX=7, ECX=0).EBX 984 if (ebx & 0x00000008) != 0 { 985 fs.set(BMI1) 986 fs.setIf((ebx&0x00000100) != 0, BMI2) 987 } 988 fs.setIf(ebx&(1<<2) != 0, SGX) 989 fs.setIf(ebx&(1<<4) != 0, HLE) 990 fs.setIf(ebx&(1<<9) != 0, ERMS) 991 fs.setIf(ebx&(1<<11) != 0, RTM) 992 fs.setIf(ebx&(1<<14) != 0, MPX) 993 fs.setIf(ebx&(1<<18) != 0, RDSEED) 994 fs.setIf(ebx&(1<<19) != 0, ADX) 995 fs.setIf(ebx&(1<<29) != 0, SHA) 996 // CPUID.(EAX=7, ECX=0).ECX 997 fs.setIf(ecx&(1<<5) != 0, WAITPKG) 998 fs.setIf(ecx&(1<<7) != 0, CETSS) 999 fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) 1000 fs.setIf(ecx&(1<<27) != 0, MOVDIRI) 1001 fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) 1002 fs.setIf(ecx&(1<<29) != 0, ENQCMD) 1003 fs.setIf(ecx&(1<<30) != 0, SGXLC) 1004 // CPUID.(EAX=7, ECX=0).EDX 1005 fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) 1006 fs.setIf(edx&(1<<14) != 0, SERIALIZE) 1007 fs.setIf(edx&(1<<16) != 0, TSXLDTRK) 1008 fs.setIf(edx&(1<<20) != 0, CETIBT) 1009 fs.setIf(edx&(1<<26) != 0, IBPB) 1010 fs.setIf(edx&(1<<27) != 0, STIBP) 1011 1012 // Only detect AVX-512 features if XGETBV is supported 1013 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { 1014 // Check for OS support 1015 eax, _ := xgetbv(0) 1016 1017 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and 1018 // ZMM16-ZMM31 state are enabled by OS) 1019 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). 1020 hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3 1021 if runtime.GOOS == "darwin" { 1022 hasAVX512 = fs.inSet(AVX) && darwinHasAVX512() 1023 } 1024 if hasAVX512 { 1025 fs.setIf(ebx&(1<<16) != 0, AVX512F) 1026 fs.setIf(ebx&(1<<17) != 0, AVX512DQ) 1027 fs.setIf(ebx&(1<<21) != 0, AVX512IFMA) 1028 fs.setIf(ebx&(1<<26) != 0, AVX512PF) 1029 fs.setIf(ebx&(1<<27) != 0, AVX512ER) 1030 fs.setIf(ebx&(1<<28) != 0, AVX512CD) 1031 fs.setIf(ebx&(1<<30) != 0, AVX512BW) 1032 fs.setIf(ebx&(1<<31) != 0, AVX512VL) 1033 // ecx 1034 fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) 1035 fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) 1036 fs.setIf(ecx&(1<<8) != 0, GFNI) 1037 fs.setIf(ecx&(1<<9) != 0, VAES) 1038 fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) 1039 fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) 1040 fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) 1041 fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) 1042 // edx 1043 fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT) 1044 fs.setIf(edx&(1<<22) != 0, AMXBF16) 1045 fs.setIf(edx&(1<<23) != 0, AVX512FP16) 1046 fs.setIf(edx&(1<<24) != 0, AMXTILE) 1047 fs.setIf(edx&(1<<25) != 0, AMXINT8) 1048 // eax1 = CPUID.(EAX=7, ECX=1).EAX 1049 fs.setIf(eax1&(1<<5) != 0, AVX512BF16) 1050 } 1051 } 1052 } 1053 1054 if maxExtendedFunction() >= 0x80000001 { 1055 _, _, c, d := cpuid(0x80000001) 1056 if (c & (1 << 5)) != 0 { 1057 fs.set(LZCNT) 1058 fs.set(POPCNT) 1059 } 1060 fs.setIf((c&(1<<0)) != 0, LAHF) 1061 fs.setIf((c&(1<<10)) != 0, IBS) 1062 fs.setIf((d&(1<<31)) != 0, AMD3DNOW) 1063 fs.setIf((d&(1<<30)) != 0, AMD3DNOWEXT) 1064 fs.setIf((d&(1<<23)) != 0, MMX) 1065 fs.setIf((d&(1<<22)) != 0, MMXEXT) 1066 fs.setIf((c&(1<<6)) != 0, SSE4A) 1067 fs.setIf(d&(1<<20) != 0, NX) 1068 fs.setIf(d&(1<<27) != 0, RDTSCP) 1069 1070 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be 1071 * used unless the OS has AVX support. */ 1072 if fs.inSet(AVX) { 1073 fs.setIf((c&0x00000800) != 0, XOP) 1074 fs.setIf((c&0x00010000) != 0, FMA4) 1075 } 1076 1077 } 1078 if maxExtendedFunction() >= 0x80000007 { 1079 _, b, _, d := cpuid(0x80000007) 1080 fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW) 1081 fs.setIf((b&(1<<1)) != 0, SUCCOR) 1082 fs.setIf((b&(1<<2)) != 0, HWA) 1083 fs.setIf((d&(1<<9)) != 0, CPBOOST) 1084 } 1085 1086 if maxExtendedFunction() >= 0x80000008 { 1087 _, b, _, _ := cpuid(0x80000008) 1088 fs.setIf((b&(1<<9)) != 0, WBNOINVD) 1089 fs.setIf((b&(1<<8)) != 0, MCOMMIT) 1090 fs.setIf((b&(1<<13)) != 0, INT_WBINVD) 1091 fs.setIf((b&(1<<4)) != 0, RDPRU) 1092 fs.setIf((b&(1<<3)) != 0, INVLPGB) 1093 fs.setIf((b&(1<<1)) != 0, MSRIRC) 1094 fs.setIf((b&(1<<0)) != 0, CLZERO) 1095 } 1096 1097 if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { 1098 eax, _, _, _ := cpuid(0x8000001b) 1099 fs.setIf((eax>>0)&1 == 1, IBSFFV) 1100 fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM) 1101 fs.setIf((eax>>2)&1 == 1, IBSOPSAM) 1102 fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT) 1103 fs.setIf((eax>>4)&1 == 1, IBSOPCNT) 1104 fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) 1105 fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) 1106 fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) 1107 } 1108 1109 return fs 1110 } 1111 1112 func valAsString(values ...uint32) []byte { 1113 r := make([]byte, 4*len(values)) 1114 for i, v := range values { 1115 dst := r[i*4:] 1116 dst[0] = byte(v & 0xff) 1117 dst[1] = byte((v >> 8) & 0xff) 1118 dst[2] = byte((v >> 16) & 0xff) 1119 dst[3] = byte((v >> 24) & 0xff) 1120 switch { 1121 case dst[0] == 0: 1122 return r[:i*4] 1123 case dst[1] == 0: 1124 return r[:i*4+1] 1125 case dst[2] == 0: 1126 return r[:i*4+2] 1127 case dst[3] == 0: 1128 return r[:i*4+3] 1129 } 1130 } 1131 return r 1132 }