github.com/klauspost/cpuid/v2@v2.2.7/cpuid.go (about) 1 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. 2 3 // Package cpuid provides information about the CPU running the current program. 4 // 5 // CPU features are detected on startup, and kept for fast access through the life of the application. 6 // Currently x86 / x64 (AMD64) as well as arm64 is supported. 7 // 8 // You can access the CPU information by accessing the shared CPU variable of the cpuid library. 9 // 10 // Package home: https://github.com/klauspost/cpuid 11 package cpuid 12 13 import ( 14 "flag" 15 "fmt" 16 "math" 17 "math/bits" 18 "os" 19 "runtime" 20 "strings" 21 ) 22 23 // AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf 24 // and Processor Programming Reference (PPR) 25 26 // Vendor is a representation of a CPU vendor. 27 type Vendor int 28 29 const ( 30 VendorUnknown Vendor = iota 31 Intel 32 AMD 33 VIA 34 Transmeta 35 NSC 36 KVM // Kernel-based Virtual Machine 37 MSVM // Microsoft Hyper-V or Windows Virtual PC 38 VMware 39 XenHVM 40 Bhyve 41 Hygon 42 SiS 43 RDC 44 45 Ampere 46 ARM 47 Broadcom 48 Cavium 49 DEC 50 Fujitsu 51 Infineon 52 Motorola 53 NVIDIA 54 AMCC 55 Qualcomm 56 Marvell 57 58 lastVendor 59 ) 60 61 //go:generate stringer -type=FeatureID,Vendor 62 63 // FeatureID is the ID of a specific cpu feature. 64 type FeatureID int 65 66 const ( 67 // Keep index -1 as unknown 68 UNKNOWN = -1 69 70 // x86 features 71 ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 72 AESNI // Advanced Encryption Standard New Instructions 73 AMD3DNOW // AMD 3DNOW 74 AMD3DNOWEXT // AMD 3DNowExt 75 AMXBF16 // Tile computational operations on BFLOAT16 numbers 76 AMXFP16 // Tile computational operations on FP16 numbers 77 AMXINT8 // Tile computational operations on 8-bit integers 78 AMXTILE // Tile architecture 79 APX_F // Intel APX 80 AVX // AVX functions 81 AVX10 // If set the Intel AVX10 Converged Vector ISA is supported 82 AVX10_128 // If set indicates that AVX10 128-bit vector support is present 83 AVX10_256 // If set indicates that AVX10 256-bit vector support is present 84 AVX10_512 // If set indicates that AVX10 512-bit vector support is present 85 AVX2 // AVX2 functions 86 AVX512BF16 // AVX-512 BFLOAT16 Instructions 87 AVX512BITALG // AVX-512 Bit Algorithms 88 AVX512BW // AVX-512 Byte and Word Instructions 89 AVX512CD // AVX-512 Conflict Detection Instructions 90 AVX512DQ // AVX-512 Doubleword and Quadword Instructions 91 AVX512ER // AVX-512 Exponential and Reciprocal Instructions 92 AVX512F // AVX-512 Foundation 93 AVX512FP16 // AVX-512 FP16 Instructions 94 AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions 95 AVX512PF // AVX-512 Prefetch Instructions 96 AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions 97 AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 98 AVX512VL // AVX-512 Vector Length Extensions 99 AVX512VNNI // AVX-512 Vector Neural Network Instructions 100 AVX512VP2INTERSECT // AVX-512 Intersect for D/Q 101 AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword 102 AVXIFMA // AVX-IFMA instructions 103 AVXNECONVERT // AVX-NE-CONVERT instructions 104 AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one 105 AVXVNNI // AVX (VEX encoded) VNNI neural network instructions 106 AVXVNNIINT8 // AVX-VNNI-INT8 instructions 107 BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 108 BMI1 // Bit Manipulation Instruction Set 1 109 BMI2 // Bit Manipulation Instruction Set 2 110 CETIBT // Intel CET Indirect Branch Tracking 111 CETSS // Intel CET Shadow Stack 112 CLDEMOTE // Cache Line Demote 113 CLMUL // Carry-less Multiplication 114 CLZERO // CLZERO instruction supported 115 CMOV // i686 CMOV 116 CMPCCXADD // CMPCCXADD instructions 117 CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB 118 CMPXCHG8 // CMPXCHG8 instruction 119 CPBOOST // Core Performance Boost 120 CPPC // AMD: Collaborative Processor Performance Control 121 CX16 // CMPXCHG16B Instruction 122 EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ 123 ENQCMD // Enqueue Command 124 ERMS // Enhanced REP MOVSB/STOSB 125 F16C // Half-precision floating-point conversion 126 FLUSH_L1D // Flush L1D cache 127 FMA3 // Intel FMA 3. Does not imply AVX. 128 FMA4 // Bulldozer FMA4 functions 129 FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide 130 FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide 131 FSRM // Fast Short Rep Mov 132 FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 133 FXSROPT // FXSAVE/FXRSTOR optimizations 134 GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. 135 HLE // Hardware Lock Elision 136 HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR 137 HTT // Hyperthreading (enabled) 138 HWA // Hardware assert supported. Indicates support for MSRC001_10 139 HYBRID_CPU // This part has CPUs of more than one type. 140 HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors 141 IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) 142 IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR 143 IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) 144 IBPB_BRTYPE // Indicates that MSR 49h (PRED_CMD) bit 0 (IBPB) flushes all branch type predictions from the CPU branch predictor 145 IBRS // AMD: Indirect Branch Restricted Speculation 146 IBRS_PREFERRED // AMD: IBRS is preferred over software solution 147 IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection 148 IBS // Instruction Based Sampling (AMD) 149 IBSBRNTRGT // Instruction Based Sampling Feature (AMD) 150 IBSFETCHSAM // Instruction Based Sampling Feature (AMD) 151 IBSFFV // Instruction Based Sampling Feature (AMD) 152 IBSOPCNT // Instruction Based Sampling Feature (AMD) 153 IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) 154 IBSOPSAM // Instruction Based Sampling Feature (AMD) 155 IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) 156 IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) 157 IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported 158 IBS_OPDATA4 // AMD: IBS op data 4 MSR supported 159 IBS_OPFUSE // AMD: Indicates support for IbsOpFuse 160 IBS_PREVENTHOST // Disallowing IBS use by the host supported 161 IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 162 IDPRED_CTRL // IPRED_DIS 163 INT_WBINVD // WBINVD/WBNOINVD are interruptible. 164 INVLPGB // NVLPGB and TLBSYNC instruction supported 165 KEYLOCKER // Key locker 166 KEYLOCKERW // Key locker wide 167 LAHF // LAHF/SAHF in long mode 168 LAM // If set, CPU supports Linear Address Masking 169 LBRVIRT // LBR virtualization 170 LZCNT // LZCNT instruction 171 MCAOVERFLOW // MCA overflow recovery support. 172 MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. 173 MCOMMIT // MCOMMIT instruction supported 174 MD_CLEAR // VERW clears CPU buffers 175 MMX // standard MMX 176 MMXEXT // SSE integer functions or AMD MMX ext 177 MOVBE // MOVBE instruction (big-endian) 178 MOVDIR64B // Move 64 Bytes as Direct Store 179 MOVDIRI // Move Doubleword as Direct Store 180 MOVSB_ZL // Fast Zero-Length MOVSB 181 MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD 182 MPX // Intel MPX (Memory Protection Extensions) 183 MSRIRC // Instruction Retired Counter MSR available 184 MSRLIST // Read/Write List of Model Specific Registers 185 MSR_PAGEFLUSH // Page Flush MSR available 186 NRIPS // Indicates support for NRIP save on VMEXIT 187 NX // NX (No-Execute) bit 188 OSXSAVE // XSAVE enabled by OS 189 PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption 190 POPCNT // POPCNT instruction 191 PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled 192 PREFETCHI // PREFETCHIT0/1 instructions 193 PSFD // Predictive Store Forward Disable 194 RDPRU // RDPRU instruction supported 195 RDRAND // RDRAND instruction is available 196 RDSEED // RDSEED instruction is available 197 RDTSCP // RDTSCP Instruction 198 RRSBA_CTRL // Restricted RSB Alternate 199 RTM // Restricted Transactional Memory 200 RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. 201 SBPB // Indicates support for the Selective Branch Predictor Barrier 202 SERIALIZE // Serialize Instruction Execution 203 SEV // AMD Secure Encrypted Virtualization supported 204 SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host 205 SEV_ALTERNATIVE // AMD SEV Alternate Injection supported 206 SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests 207 SEV_ES // AMD SEV Encrypted State supported 208 SEV_RESTRICTED // AMD SEV Restricted Injection supported 209 SEV_SNP // AMD SEV Secure Nested Paging supported 210 SGX // Software Guard Extensions 211 SGXLC // Software Guard Extensions Launch Control 212 SHA // Intel SHA Extensions 213 SME // AMD Secure Memory Encryption supported 214 SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced 215 SPEC_CTRL_SSBD // Speculative Store Bypass Disable 216 SRBDS_CTRL // SRBDS mitigation MSR available 217 SRSO_MSR_FIX // Indicates that software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO. 218 SRSO_NO // Indicates the CPU is not subject to the SRSO vulnerability 219 SRSO_USER_KERNEL_NO // Indicates the CPU is not subject to the SRSO vulnerability across user/kernel boundaries 220 SSE // SSE functions 221 SSE2 // P4 SSE functions 222 SSE3 // Prescott SSE3 functions 223 SSE4 // Penryn SSE4.1 functions 224 SSE42 // Nehalem SSE4.2 functions 225 SSE4A // AMD Barcelona microarchitecture SSE4a instructions 226 SSSE3 // Conroe SSSE3 functions 227 STIBP // Single Thread Indirect Branch Predictors 228 STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On 229 STOSB_SHORT // Fast short STOSB 230 SUCCOR // Software uncorrectable error containment and recovery capability. 231 SVM // AMD Secure Virtual Machine 232 SVMDA // Indicates support for the SVM decode assists. 233 SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control 234 SVML // AMD SVM lock. Indicates support for SVM-Lock. 235 SVMNP // AMD SVM nested paging 236 SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter 237 SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold 238 SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. 239 SYSEE // SYSENTER and SYSEXIT instructions 240 TBM // AMD Trailing Bit Manipulation 241 TDX_GUEST // Intel Trust Domain Extensions Guest 242 TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations 243 TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. 244 TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. 245 TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 246 TSXLDTRK // Intel TSX Suspend Load Address Tracking 247 VAES // Vector AES. AVX(512) versions requires additional checks. 248 VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits. 249 VMPL // AMD VM Permission Levels supported 250 VMSA_REGPROT // AMD VMSA Register Protection supported 251 VMX // Virtual Machine Extensions 252 VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. 253 VTE // AMD Virtual Transparent Encryption supported 254 WAITPKG // TPAUSE, UMONITOR, UMWAIT 255 WBNOINVD // Write Back and Do Not Invalidate Cache 256 WRMSRNS // Non-Serializing Write to Model Specific Register 257 X87 // FPU 258 XGETBV1 // Supports XGETBV with ECX = 1 259 XOP // Bulldozer XOP functions 260 XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV 261 XSAVEC // Supports XSAVEC and the compacted form of XRSTOR. 262 XSAVEOPT // XSAVEOPT available 263 XSAVES // Supports XSAVES/XRSTORS and IA32_XSS 264 265 // ARM features: 266 AESARM // AES instructions 267 ARMCPUID // Some CPU ID registers readable at user-level 268 ASIMD // Advanced SIMD 269 ASIMDDP // SIMD Dot Product 270 ASIMDHP // Advanced SIMD half-precision floating point 271 ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) 272 ATOMICS // Large System Extensions (LSE) 273 CRC32 // CRC32/CRC32C instructions 274 DCPOP // Data cache clean to Point of Persistence (DC CVAP) 275 EVTSTRM // Generic timer 276 FCMA // Floatin point complex number addition and multiplication 277 FP // Single-precision and double-precision floating point 278 FPHP // Half-precision floating point 279 GPA // Generic Pointer Authentication 280 JSCVT // Javascript-style double->int convert (FJCVTZS) 281 LRCPC // Weaker release consistency (LDAPR, etc) 282 PMULL // Polynomial Multiply instructions (PMULL/PMULL2) 283 SHA1 // SHA-1 instructions (SHA1C, etc) 284 SHA2 // SHA-2 instructions (SHA256H, etc) 285 SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) 286 SHA512 // SHA512 instructions 287 SM3 // SM3 instructions 288 SM4 // SM4 instructions 289 SVE // Scalable Vector Extension 290 // Keep it last. It automatically defines the size of []flagSet 291 lastID 292 293 firstID FeatureID = UNKNOWN + 1 294 ) 295 296 // CPUInfo contains information about the detected system CPU. 297 type CPUInfo struct { 298 BrandName string // Brand name reported by the CPU 299 VendorID Vendor // Comparable CPU vendor ID 300 VendorString string // Raw vendor string. 301 featureSet flagSet // Features of the CPU 302 PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. 303 ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. 304 LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. 305 Family int // CPU family number 306 Model int // CPU model number 307 Stepping int // CPU stepping info 308 CacheLine int // Cache line size in bytes. Will be 0 if undetectable. 309 Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. 310 BoostFreq int64 // Max clock speed, if known, 0 otherwise 311 Cache struct { 312 L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected 313 L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected 314 L2 int // L2 Cache (per core or shared). Will be -1 if undetected 315 L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected 316 } 317 SGX SGXSupport 318 AMDMemEncryption AMDMemEncryptionSupport 319 AVX10Level uint8 320 maxFunc uint32 321 maxExFunc uint32 322 } 323 324 var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) 325 var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) 326 var xgetbv func(index uint32) (eax, edx uint32) 327 var rdtscpAsm func() (eax, ebx, ecx, edx uint32) 328 var darwinHasAVX512 = func() bool { return false } 329 330 // CPU contains information about the CPU as detected on startup, 331 // or when Detect last was called. 332 // 333 // Use this as the primary entry point to you data. 334 var CPU CPUInfo 335 336 func init() { 337 initCPU() 338 Detect() 339 } 340 341 // Detect will re-detect current CPU info. 342 // This will replace the content of the exported CPU variable. 343 // 344 // Unless you expect the CPU to change while you are running your program 345 // you should not need to call this function. 346 // If you call this, you must ensure that no other goroutine is accessing the 347 // exported CPU variable. 348 func Detect() { 349 // Set defaults 350 CPU.ThreadsPerCore = 1 351 CPU.Cache.L1I = -1 352 CPU.Cache.L1D = -1 353 CPU.Cache.L2 = -1 354 CPU.Cache.L3 = -1 355 safe := true 356 if detectArmFlag != nil { 357 safe = !*detectArmFlag 358 } 359 addInfo(&CPU, safe) 360 if displayFeats != nil && *displayFeats { 361 fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ",")) 362 // Exit with non-zero so tests will print value. 363 os.Exit(1) 364 } 365 if disableFlag != nil { 366 s := strings.Split(*disableFlag, ",") 367 for _, feat := range s { 368 feat := ParseFeature(strings.TrimSpace(feat)) 369 if feat != UNKNOWN { 370 CPU.featureSet.unset(feat) 371 } 372 } 373 } 374 } 375 376 // DetectARM will detect ARM64 features. 377 // This is NOT done automatically since it can potentially crash 378 // if the OS does not handle the command. 379 // If in the future this can be done safely this function may not 380 // do anything. 381 func DetectARM() { 382 addInfo(&CPU, false) 383 } 384 385 var detectArmFlag *bool 386 var displayFeats *bool 387 var disableFlag *string 388 389 // Flags will enable flags. 390 // This must be called *before* flag.Parse AND 391 // Detect must be called after the flags have been parsed. 392 // Note that this means that any detection used in init() functions 393 // will not contain these flags. 394 func Flags() { 395 disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list") 396 displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits") 397 detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash") 398 } 399 400 // Supports returns whether the CPU supports all of the requested features. 401 func (c CPUInfo) Supports(ids ...FeatureID) bool { 402 for _, id := range ids { 403 if !c.featureSet.inSet(id) { 404 return false 405 } 406 } 407 return true 408 } 409 410 // Has allows for checking a single feature. 411 // Should be inlined by the compiler. 412 func (c *CPUInfo) Has(id FeatureID) bool { 413 return c.featureSet.inSet(id) 414 } 415 416 // AnyOf returns whether the CPU supports one or more of the requested features. 417 func (c CPUInfo) AnyOf(ids ...FeatureID) bool { 418 for _, id := range ids { 419 if c.featureSet.inSet(id) { 420 return true 421 } 422 } 423 return false 424 } 425 426 // Features contains several features combined for a fast check using 427 // CpuInfo.HasAll 428 type Features *flagSet 429 430 // CombineFeatures allows to combine several features for a close to constant time lookup. 431 func CombineFeatures(ids ...FeatureID) Features { 432 var v flagSet 433 for _, id := range ids { 434 v.set(id) 435 } 436 return &v 437 } 438 439 func (c *CPUInfo) HasAll(f Features) bool { 440 return c.featureSet.hasSetP(f) 441 } 442 443 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels 444 var oneOfLevel = CombineFeatures(SYSEE, SYSCALL) 445 var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2) 446 var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) 447 var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) 448 var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) 449 450 // X64Level returns the microarchitecture level detected on the CPU. 451 // If features are lacking or non x64 mode, 0 is returned. 452 // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels 453 func (c CPUInfo) X64Level() int { 454 if !c.featureSet.hasOneOf(oneOfLevel) { 455 return 0 456 } 457 if c.featureSet.hasSetP(level4Features) { 458 return 4 459 } 460 if c.featureSet.hasSetP(level3Features) { 461 return 3 462 } 463 if c.featureSet.hasSetP(level2Features) { 464 return 2 465 } 466 if c.featureSet.hasSetP(level1Features) { 467 return 1 468 } 469 return 0 470 } 471 472 // Disable will disable one or several features. 473 func (c *CPUInfo) Disable(ids ...FeatureID) bool { 474 for _, id := range ids { 475 c.featureSet.unset(id) 476 } 477 return true 478 } 479 480 // Enable will disable one or several features even if they were undetected. 481 // This is of course not recommended for obvious reasons. 482 func (c *CPUInfo) Enable(ids ...FeatureID) bool { 483 for _, id := range ids { 484 c.featureSet.set(id) 485 } 486 return true 487 } 488 489 // IsVendor returns true if vendor is recognized as Intel 490 func (c CPUInfo) IsVendor(v Vendor) bool { 491 return c.VendorID == v 492 } 493 494 // FeatureSet returns all available features as strings. 495 func (c CPUInfo) FeatureSet() []string { 496 s := make([]string, 0, c.featureSet.nEnabled()) 497 s = append(s, c.featureSet.Strings()...) 498 return s 499 } 500 501 // RTCounter returns the 64-bit time-stamp counter 502 // Uses the RDTSCP instruction. The value 0 is returned 503 // if the CPU does not support the instruction. 504 func (c CPUInfo) RTCounter() uint64 { 505 if !c.Supports(RDTSCP) { 506 return 0 507 } 508 a, _, _, d := rdtscpAsm() 509 return uint64(a) | (uint64(d) << 32) 510 } 511 512 // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. 513 // This variable is OS dependent, but on Linux contains information 514 // about the current cpu/core the code is running on. 515 // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. 516 func (c CPUInfo) Ia32TscAux() uint32 { 517 if !c.Supports(RDTSCP) { 518 return 0 519 } 520 _, _, ecx, _ := rdtscpAsm() 521 return ecx 522 } 523 524 // LogicalCPU will return the Logical CPU the code is currently executing on. 525 // This is likely to change when the OS re-schedules the running thread 526 // to another CPU. 527 // If the current core cannot be detected, -1 will be returned. 528 func (c CPUInfo) LogicalCPU() int { 529 if c.maxFunc < 1 { 530 return -1 531 } 532 _, ebx, _, _ := cpuid(1) 533 return int(ebx >> 24) 534 } 535 536 // frequencies tries to compute the clock speed of the CPU. If leaf 15 is 537 // supported, use it, otherwise parse the brand string. Yes, really. 538 func (c *CPUInfo) frequencies() { 539 c.Hz, c.BoostFreq = 0, 0 540 mfi := maxFunctionID() 541 if mfi >= 0x15 { 542 eax, ebx, ecx, _ := cpuid(0x15) 543 if eax != 0 && ebx != 0 && ecx != 0 { 544 c.Hz = (int64(ecx) * int64(ebx)) / int64(eax) 545 } 546 } 547 if mfi >= 0x16 { 548 a, b, _, _ := cpuid(0x16) 549 // Base... 550 if a&0xffff > 0 { 551 c.Hz = int64(a&0xffff) * 1_000_000 552 } 553 // Boost... 554 if b&0xffff > 0 { 555 c.BoostFreq = int64(b&0xffff) * 1_000_000 556 } 557 } 558 if c.Hz > 0 { 559 return 560 } 561 562 // computeHz determines the official rated speed of a CPU from its brand 563 // string. This insanity is *actually the official documented way to do 564 // this according to Intel*, prior to leaf 0x15 existing. The official 565 // documentation only shows this working for exactly `x.xx` or `xxxx` 566 // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other 567 // sizes. 568 model := c.BrandName 569 hz := strings.LastIndex(model, "Hz") 570 if hz < 3 { 571 return 572 } 573 var multiplier int64 574 switch model[hz-1] { 575 case 'M': 576 multiplier = 1000 * 1000 577 case 'G': 578 multiplier = 1000 * 1000 * 1000 579 case 'T': 580 multiplier = 1000 * 1000 * 1000 * 1000 581 } 582 if multiplier == 0 { 583 return 584 } 585 freq := int64(0) 586 divisor := int64(0) 587 decimalShift := int64(1) 588 var i int 589 for i = hz - 2; i >= 0 && model[i] != ' '; i-- { 590 if model[i] >= '0' && model[i] <= '9' { 591 freq += int64(model[i]-'0') * decimalShift 592 decimalShift *= 10 593 } else if model[i] == '.' { 594 if divisor != 0 { 595 return 596 } 597 divisor = decimalShift 598 } else { 599 return 600 } 601 } 602 // we didn't find a space 603 if i < 0 { 604 return 605 } 606 if divisor != 0 { 607 c.Hz = (freq * multiplier) / divisor 608 return 609 } 610 c.Hz = freq * multiplier 611 } 612 613 // VM Will return true if the cpu id indicates we are in 614 // a virtual machine. 615 func (c CPUInfo) VM() bool { 616 return CPU.featureSet.inSet(HYPERVISOR) 617 } 618 619 // flags contains detected cpu features and characteristics 620 type flags uint64 621 622 // log2(bits_in_uint64) 623 const flagBitsLog2 = 6 624 const flagBits = 1 << flagBitsLog2 625 const flagMask = flagBits - 1 626 627 // flagSet contains detected cpu features and characteristics in an array of flags 628 type flagSet [(lastID + flagMask) / flagBits]flags 629 630 func (s *flagSet) inSet(feat FeatureID) bool { 631 return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 632 } 633 634 func (s *flagSet) set(feat FeatureID) { 635 s[feat>>flagBitsLog2] |= 1 << (feat & flagMask) 636 } 637 638 // setIf will set a feature if boolean is true. 639 func (s *flagSet) setIf(cond bool, features ...FeatureID) { 640 if cond { 641 for _, offset := range features { 642 s[offset>>flagBitsLog2] |= 1 << (offset & flagMask) 643 } 644 } 645 } 646 647 func (s *flagSet) unset(offset FeatureID) { 648 bit := flags(1 << (offset & flagMask)) 649 s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit 650 } 651 652 // or with another flagset. 653 func (s *flagSet) or(other flagSet) { 654 for i, v := range other[:] { 655 s[i] |= v 656 } 657 } 658 659 // hasSet returns whether all features are present. 660 func (s *flagSet) hasSet(other flagSet) bool { 661 for i, v := range other[:] { 662 if s[i]&v != v { 663 return false 664 } 665 } 666 return true 667 } 668 669 // hasSet returns whether all features are present. 670 func (s *flagSet) hasSetP(other *flagSet) bool { 671 for i, v := range other[:] { 672 if s[i]&v != v { 673 return false 674 } 675 } 676 return true 677 } 678 679 // hasOneOf returns whether one or more features are present. 680 func (s *flagSet) hasOneOf(other *flagSet) bool { 681 for i, v := range other[:] { 682 if s[i]&v != 0 { 683 return true 684 } 685 } 686 return false 687 } 688 689 // nEnabled will return the number of enabled flags. 690 func (s *flagSet) nEnabled() (n int) { 691 for _, v := range s[:] { 692 n += bits.OnesCount64(uint64(v)) 693 } 694 return n 695 } 696 697 func flagSetWith(feat ...FeatureID) flagSet { 698 var res flagSet 699 for _, f := range feat { 700 res.set(f) 701 } 702 return res 703 } 704 705 // ParseFeature will parse the string and return the ID of the matching feature. 706 // Will return UNKNOWN if not found. 707 func ParseFeature(s string) FeatureID { 708 s = strings.ToUpper(s) 709 for i := firstID; i < lastID; i++ { 710 if i.String() == s { 711 return i 712 } 713 } 714 return UNKNOWN 715 } 716 717 // Strings returns an array of the detected features for FlagsSet. 718 func (s flagSet) Strings() []string { 719 if len(s) == 0 { 720 return []string{""} 721 } 722 r := make([]string, 0) 723 for i := firstID; i < lastID; i++ { 724 if s.inSet(i) { 725 r = append(r, i.String()) 726 } 727 } 728 return r 729 } 730 731 func maxExtendedFunction() uint32 { 732 eax, _, _, _ := cpuid(0x80000000) 733 return eax 734 } 735 736 func maxFunctionID() uint32 { 737 a, _, _, _ := cpuid(0) 738 return a 739 } 740 741 func brandName() string { 742 if maxExtendedFunction() >= 0x80000004 { 743 v := make([]uint32, 0, 48) 744 for i := uint32(0); i < 3; i++ { 745 a, b, c, d := cpuid(0x80000002 + i) 746 v = append(v, a, b, c, d) 747 } 748 return strings.Trim(string(valAsString(v...)), " ") 749 } 750 return "unknown" 751 } 752 753 func threadsPerCore() int { 754 mfi := maxFunctionID() 755 vend, _ := vendorID() 756 757 if mfi < 0x4 || (vend != Intel && vend != AMD) { 758 return 1 759 } 760 761 if mfi < 0xb { 762 if vend != Intel { 763 return 1 764 } 765 _, b, _, d := cpuid(1) 766 if (d & (1 << 28)) != 0 { 767 // v will contain logical core count 768 v := (b >> 16) & 255 769 if v > 1 { 770 a4, _, _, _ := cpuid(4) 771 // physical cores 772 v2 := (a4 >> 26) + 1 773 if v2 > 0 { 774 return int(v) / int(v2) 775 } 776 } 777 } 778 return 1 779 } 780 _, b, _, _ := cpuidex(0xb, 0) 781 if b&0xffff == 0 { 782 if vend == AMD { 783 // Workaround for AMD returning 0, assume 2 if >= Zen 2 784 // It will be more correct than not. 785 fam, _, _ := familyModel() 786 _, _, _, d := cpuid(1) 787 if (d&(1<<28)) != 0 && fam >= 23 { 788 return 2 789 } 790 } 791 return 1 792 } 793 return int(b & 0xffff) 794 } 795 796 func logicalCores() int { 797 mfi := maxFunctionID() 798 v, _ := vendorID() 799 switch v { 800 case Intel: 801 // Use this on old Intel processors 802 if mfi < 0xb { 803 if mfi < 1 { 804 return 0 805 } 806 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) 807 // that can be assigned to logical processors in a physical package. 808 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. 809 _, ebx, _, _ := cpuid(1) 810 logical := (ebx >> 16) & 0xff 811 return int(logical) 812 } 813 _, b, _, _ := cpuidex(0xb, 1) 814 return int(b & 0xffff) 815 case AMD, Hygon: 816 _, b, _, _ := cpuid(1) 817 return int((b >> 16) & 0xff) 818 default: 819 return 0 820 } 821 } 822 823 func familyModel() (family, model, stepping int) { 824 if maxFunctionID() < 0x1 { 825 return 0, 0, 0 826 } 827 eax, _, _, _ := cpuid(1) 828 // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0]. 829 family = int((eax >> 8) & 0xf) 830 extFam := family == 0x6 // Intel is 0x6, needs extended model. 831 if family == 0xf { 832 // Add ExtFamily 833 family += int((eax >> 20) & 0xff) 834 extFam = true 835 } 836 // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0]. 837 model = int((eax >> 4) & 0xf) 838 if extFam { 839 // Add ExtModel 840 model += int((eax >> 12) & 0xf0) 841 } 842 stepping = int(eax & 0xf) 843 return family, model, stepping 844 } 845 846 func physicalCores() int { 847 v, _ := vendorID() 848 switch v { 849 case Intel: 850 return logicalCores() / threadsPerCore() 851 case AMD, Hygon: 852 lc := logicalCores() 853 tpc := threadsPerCore() 854 if lc > 0 && tpc > 0 { 855 return lc / tpc 856 } 857 858 // The following is inaccurate on AMD EPYC 7742 64-Core Processor 859 if maxExtendedFunction() >= 0x80000008 { 860 _, _, c, _ := cpuid(0x80000008) 861 if c&0xff > 0 { 862 return int(c&0xff) + 1 863 } 864 } 865 } 866 return 0 867 } 868 869 // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID 870 var vendorMapping = map[string]Vendor{ 871 "AMDisbetter!": AMD, 872 "AuthenticAMD": AMD, 873 "CentaurHauls": VIA, 874 "GenuineIntel": Intel, 875 "TransmetaCPU": Transmeta, 876 "GenuineTMx86": Transmeta, 877 "Geode by NSC": NSC, 878 "VIA VIA VIA ": VIA, 879 "KVMKVMKVMKVM": KVM, 880 "Microsoft Hv": MSVM, 881 "VMwareVMware": VMware, 882 "XenVMMXenVMM": XenHVM, 883 "bhyve bhyve ": Bhyve, 884 "HygonGenuine": Hygon, 885 "Vortex86 SoC": SiS, 886 "SiS SiS SiS ": SiS, 887 "RiseRiseRise": SiS, 888 "Genuine RDC": RDC, 889 } 890 891 func vendorID() (Vendor, string) { 892 _, b, c, d := cpuid(0) 893 v := string(valAsString(b, d, c)) 894 vend, ok := vendorMapping[v] 895 if !ok { 896 return VendorUnknown, v 897 } 898 return vend, v 899 } 900 901 func cacheLine() int { 902 if maxFunctionID() < 0x1 { 903 return 0 904 } 905 906 _, ebx, _, _ := cpuid(1) 907 cache := (ebx & 0xff00) >> 5 // cflush size 908 if cache == 0 && maxExtendedFunction() >= 0x80000006 { 909 _, _, ecx, _ := cpuid(0x80000006) 910 cache = ecx & 0xff // cacheline size 911 } 912 // TODO: Read from Cache and TLB Information 913 return int(cache) 914 } 915 916 func (c *CPUInfo) cacheSize() { 917 c.Cache.L1D = -1 918 c.Cache.L1I = -1 919 c.Cache.L2 = -1 920 c.Cache.L3 = -1 921 vendor, _ := vendorID() 922 switch vendor { 923 case Intel: 924 if maxFunctionID() < 4 { 925 return 926 } 927 c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0 928 for i := uint32(0); ; i++ { 929 eax, ebx, ecx, _ := cpuidex(4, i) 930 cacheType := eax & 15 931 if cacheType == 0 { 932 break 933 } 934 cacheLevel := (eax >> 5) & 7 935 coherency := int(ebx&0xfff) + 1 936 partitions := int((ebx>>12)&0x3ff) + 1 937 associativity := int((ebx>>22)&0x3ff) + 1 938 sets := int(ecx) + 1 939 size := associativity * partitions * coherency * sets 940 switch cacheLevel { 941 case 1: 942 if cacheType == 1 { 943 // 1 = Data Cache 944 c.Cache.L1D = size 945 } else if cacheType == 2 { 946 // 2 = Instruction Cache 947 c.Cache.L1I = size 948 } else { 949 if c.Cache.L1D < 0 { 950 c.Cache.L1I = size 951 } 952 if c.Cache.L1I < 0 { 953 c.Cache.L1I = size 954 } 955 } 956 case 2: 957 c.Cache.L2 = size 958 case 3: 959 c.Cache.L3 = size 960 } 961 } 962 case AMD, Hygon: 963 // Untested. 964 if maxExtendedFunction() < 0x80000005 { 965 return 966 } 967 _, _, ecx, edx := cpuid(0x80000005) 968 c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) 969 c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) 970 971 if maxExtendedFunction() < 0x80000006 { 972 return 973 } 974 _, _, ecx, _ = cpuid(0x80000006) 975 c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) 976 977 // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties 978 if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) { 979 return 980 } 981 982 // Xen Hypervisor is buggy and returns the same entry no matter ECX value. 983 // Hack: When we encounter the same entry 100 times we break. 984 nSame := 0 985 var last uint32 986 for i := uint32(0); i < math.MaxUint32; i++ { 987 eax, ebx, ecx, _ := cpuidex(0x8000001D, i) 988 989 level := (eax >> 5) & 7 990 cacheNumSets := ecx + 1 991 cacheLineSize := 1 + (ebx & 2047) 992 cachePhysPartitions := 1 + ((ebx >> 12) & 511) 993 cacheNumWays := 1 + ((ebx >> 22) & 511) 994 995 typ := eax & 15 996 size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) 997 if typ == 0 { 998 return 999 } 1000 1001 // Check for the same value repeated. 1002 comb := eax ^ ebx ^ ecx 1003 if comb == last { 1004 nSame++ 1005 if nSame == 100 { 1006 return 1007 } 1008 } 1009 last = comb 1010 1011 switch level { 1012 case 1: 1013 switch typ { 1014 case 1: 1015 // Data cache 1016 c.Cache.L1D = size 1017 case 2: 1018 // Inst cache 1019 c.Cache.L1I = size 1020 default: 1021 if c.Cache.L1D < 0 { 1022 c.Cache.L1I = size 1023 } 1024 if c.Cache.L1I < 0 { 1025 c.Cache.L1I = size 1026 } 1027 } 1028 case 2: 1029 c.Cache.L2 = size 1030 case 3: 1031 c.Cache.L3 = size 1032 } 1033 } 1034 } 1035 } 1036 1037 type SGXEPCSection struct { 1038 BaseAddress uint64 1039 EPCSize uint64 1040 } 1041 1042 type SGXSupport struct { 1043 Available bool 1044 LaunchControl bool 1045 SGX1Supported bool 1046 SGX2Supported bool 1047 MaxEnclaveSizeNot64 int64 1048 MaxEnclaveSize64 int64 1049 EPCSections []SGXEPCSection 1050 } 1051 1052 func hasSGX(available, lc bool) (rval SGXSupport) { 1053 rval.Available = available 1054 1055 if !available { 1056 return 1057 } 1058 1059 rval.LaunchControl = lc 1060 1061 a, _, _, d := cpuidex(0x12, 0) 1062 rval.SGX1Supported = a&0x01 != 0 1063 rval.SGX2Supported = a&0x02 != 0 1064 rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 1065 rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 1066 rval.EPCSections = make([]SGXEPCSection, 0) 1067 1068 for subleaf := uint32(2); subleaf < 2+8; subleaf++ { 1069 eax, ebx, ecx, edx := cpuidex(0x12, subleaf) 1070 leafType := eax & 0xf 1071 1072 if leafType == 0 { 1073 // Invalid subleaf, stop iterating 1074 break 1075 } else if leafType == 1 { 1076 // EPC Section subleaf 1077 baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) 1078 size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) 1079 1080 section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} 1081 rval.EPCSections = append(rval.EPCSections, section) 1082 } 1083 } 1084 1085 return 1086 } 1087 1088 type AMDMemEncryptionSupport struct { 1089 Available bool 1090 CBitPossition uint32 1091 NumVMPL uint32 1092 PhysAddrReduction uint32 1093 NumEntryptedGuests uint32 1094 MinSevNoEsAsid uint32 1095 } 1096 1097 func hasAMDMemEncryption(available bool) (rval AMDMemEncryptionSupport) { 1098 rval.Available = available 1099 if !available { 1100 return 1101 } 1102 1103 _, b, c, d := cpuidex(0x8000001f, 0) 1104 1105 rval.CBitPossition = b & 0x3f 1106 rval.PhysAddrReduction = (b >> 6) & 0x3F 1107 rval.NumVMPL = (b >> 12) & 0xf 1108 rval.NumEntryptedGuests = c 1109 rval.MinSevNoEsAsid = d 1110 1111 return 1112 } 1113 1114 func support() flagSet { 1115 var fs flagSet 1116 mfi := maxFunctionID() 1117 vend, _ := vendorID() 1118 if mfi < 0x1 { 1119 return fs 1120 } 1121 family, model, _ := familyModel() 1122 1123 _, _, c, d := cpuid(1) 1124 fs.setIf((d&(1<<0)) != 0, X87) 1125 fs.setIf((d&(1<<8)) != 0, CMPXCHG8) 1126 fs.setIf((d&(1<<11)) != 0, SYSEE) 1127 fs.setIf((d&(1<<15)) != 0, CMOV) 1128 fs.setIf((d&(1<<23)) != 0, MMX) 1129 fs.setIf((d&(1<<24)) != 0, FXSR) 1130 fs.setIf((d&(1<<25)) != 0, FXSROPT) 1131 fs.setIf((d&(1<<25)) != 0, SSE) 1132 fs.setIf((d&(1<<26)) != 0, SSE2) 1133 fs.setIf((c&1) != 0, SSE3) 1134 fs.setIf((c&(1<<5)) != 0, VMX) 1135 fs.setIf((c&(1<<9)) != 0, SSSE3) 1136 fs.setIf((c&(1<<19)) != 0, SSE4) 1137 fs.setIf((c&(1<<20)) != 0, SSE42) 1138 fs.setIf((c&(1<<25)) != 0, AESNI) 1139 fs.setIf((c&(1<<1)) != 0, CLMUL) 1140 fs.setIf(c&(1<<22) != 0, MOVBE) 1141 fs.setIf(c&(1<<23) != 0, POPCNT) 1142 fs.setIf(c&(1<<30) != 0, RDRAND) 1143 1144 // This bit has been reserved by Intel & AMD for use by hypervisors, 1145 // and indicates the presence of a hypervisor. 1146 fs.setIf(c&(1<<31) != 0, HYPERVISOR) 1147 fs.setIf(c&(1<<29) != 0, F16C) 1148 fs.setIf(c&(1<<13) != 0, CX16) 1149 1150 if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { 1151 fs.setIf(threadsPerCore() > 1, HTT) 1152 } 1153 if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { 1154 fs.setIf(threadsPerCore() > 1, HTT) 1155 } 1156 fs.setIf(c&1<<26 != 0, XSAVE) 1157 fs.setIf(c&1<<27 != 0, OSXSAVE) 1158 // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits 1159 const avxCheck = 1<<26 | 1<<27 | 1<<28 1160 if c&avxCheck == avxCheck { 1161 // Check for OS support 1162 eax, _ := xgetbv(0) 1163 if (eax & 0x6) == 0x6 { 1164 fs.set(AVX) 1165 switch vend { 1166 case Intel: 1167 // Older than Haswell. 1168 fs.setIf(family == 6 && model < 60, AVXSLOW) 1169 case AMD: 1170 // Older than Zen 2 1171 fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW) 1172 } 1173 } 1174 } 1175 // FMA3 can be used with SSE registers, so no OS support is strictly needed. 1176 // fma3 and OSXSAVE needed. 1177 const fma3Check = 1<<12 | 1<<27 1178 fs.setIf(c&fma3Check == fma3Check, FMA3) 1179 1180 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. 1181 if mfi >= 7 { 1182 _, ebx, ecx, edx := cpuidex(7, 0) 1183 if fs.inSet(AVX) && (ebx&0x00000020) != 0 { 1184 fs.set(AVX2) 1185 } 1186 // CPUID.(EAX=7, ECX=0).EBX 1187 if (ebx & 0x00000008) != 0 { 1188 fs.set(BMI1) 1189 fs.setIf((ebx&0x00000100) != 0, BMI2) 1190 } 1191 fs.setIf(ebx&(1<<2) != 0, SGX) 1192 fs.setIf(ebx&(1<<4) != 0, HLE) 1193 fs.setIf(ebx&(1<<9) != 0, ERMS) 1194 fs.setIf(ebx&(1<<11) != 0, RTM) 1195 fs.setIf(ebx&(1<<14) != 0, MPX) 1196 fs.setIf(ebx&(1<<18) != 0, RDSEED) 1197 fs.setIf(ebx&(1<<19) != 0, ADX) 1198 fs.setIf(ebx&(1<<29) != 0, SHA) 1199 1200 // CPUID.(EAX=7, ECX=0).ECX 1201 fs.setIf(ecx&(1<<5) != 0, WAITPKG) 1202 fs.setIf(ecx&(1<<7) != 0, CETSS) 1203 fs.setIf(ecx&(1<<8) != 0, GFNI) 1204 fs.setIf(ecx&(1<<9) != 0, VAES) 1205 fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) 1206 fs.setIf(ecx&(1<<13) != 0, TME) 1207 fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) 1208 fs.setIf(ecx&(1<<23) != 0, KEYLOCKER) 1209 fs.setIf(ecx&(1<<27) != 0, MOVDIRI) 1210 fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) 1211 fs.setIf(ecx&(1<<29) != 0, ENQCMD) 1212 fs.setIf(ecx&(1<<30) != 0, SGXLC) 1213 1214 // CPUID.(EAX=7, ECX=0).EDX 1215 fs.setIf(edx&(1<<4) != 0, FSRM) 1216 fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL) 1217 fs.setIf(edx&(1<<10) != 0, MD_CLEAR) 1218 fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) 1219 fs.setIf(edx&(1<<14) != 0, SERIALIZE) 1220 fs.setIf(edx&(1<<15) != 0, HYBRID_CPU) 1221 fs.setIf(edx&(1<<16) != 0, TSXLDTRK) 1222 fs.setIf(edx&(1<<18) != 0, PCONFIG) 1223 fs.setIf(edx&(1<<20) != 0, CETIBT) 1224 fs.setIf(edx&(1<<26) != 0, IBPB) 1225 fs.setIf(edx&(1<<27) != 0, STIBP) 1226 fs.setIf(edx&(1<<28) != 0, FLUSH_L1D) 1227 fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP) 1228 fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP) 1229 fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD) 1230 1231 // CPUID.(EAX=7, ECX=1).EAX 1232 eax1, _, _, edx1 := cpuidex(7, 1) 1233 fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI) 1234 fs.setIf(eax1&(1<<7) != 0, CMPCCXADD) 1235 fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL) 1236 fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT) 1237 fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT) 1238 fs.setIf(eax1&(1<<22) != 0, HRESET) 1239 fs.setIf(eax1&(1<<23) != 0, AVXIFMA) 1240 fs.setIf(eax1&(1<<26) != 0, LAM) 1241 1242 // CPUID.(EAX=7, ECX=1).EDX 1243 fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8) 1244 fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT) 1245 fs.setIf(edx1&(1<<14) != 0, PREFETCHI) 1246 fs.setIf(edx1&(1<<19) != 0, AVX10) 1247 fs.setIf(edx1&(1<<21) != 0, APX_F) 1248 1249 // Only detect AVX-512 features if XGETBV is supported 1250 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { 1251 // Check for OS support 1252 eax, _ := xgetbv(0) 1253 1254 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and 1255 // ZMM16-ZMM31 state are enabled by OS) 1256 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). 1257 hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3 1258 if runtime.GOOS == "darwin" { 1259 hasAVX512 = fs.inSet(AVX) && darwinHasAVX512() 1260 } 1261 if hasAVX512 { 1262 fs.setIf(ebx&(1<<16) != 0, AVX512F) 1263 fs.setIf(ebx&(1<<17) != 0, AVX512DQ) 1264 fs.setIf(ebx&(1<<21) != 0, AVX512IFMA) 1265 fs.setIf(ebx&(1<<26) != 0, AVX512PF) 1266 fs.setIf(ebx&(1<<27) != 0, AVX512ER) 1267 fs.setIf(ebx&(1<<28) != 0, AVX512CD) 1268 fs.setIf(ebx&(1<<30) != 0, AVX512BW) 1269 fs.setIf(ebx&(1<<31) != 0, AVX512VL) 1270 // ecx 1271 fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) 1272 fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) 1273 fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) 1274 fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) 1275 fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) 1276 // edx 1277 fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT) 1278 fs.setIf(edx&(1<<22) != 0, AMXBF16) 1279 fs.setIf(edx&(1<<23) != 0, AVX512FP16) 1280 fs.setIf(edx&(1<<24) != 0, AMXTILE) 1281 fs.setIf(edx&(1<<25) != 0, AMXINT8) 1282 // eax1 = CPUID.(EAX=7, ECX=1).EAX 1283 fs.setIf(eax1&(1<<5) != 0, AVX512BF16) 1284 fs.setIf(eax1&(1<<19) != 0, WRMSRNS) 1285 fs.setIf(eax1&(1<<21) != 0, AMXFP16) 1286 fs.setIf(eax1&(1<<27) != 0, MSRLIST) 1287 } 1288 } 1289 1290 // CPUID.(EAX=7, ECX=2) 1291 _, _, _, edx = cpuidex(7, 2) 1292 fs.setIf(edx&(1<<0) != 0, PSFD) 1293 fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL) 1294 fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL) 1295 fs.setIf(edx&(1<<4) != 0, BHI_CTRL) 1296 fs.setIf(edx&(1<<5) != 0, MCDT_NO) 1297 1298 // Add keylocker features. 1299 if fs.inSet(KEYLOCKER) && mfi >= 0x19 { 1300 _, ebx, _, _ := cpuidex(0x19, 0) 1301 fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4) 1302 } 1303 1304 // Add AVX10 features. 1305 if fs.inSet(AVX10) && mfi >= 0x24 { 1306 _, ebx, _, _ := cpuidex(0x24, 0) 1307 fs.setIf(ebx&(1<<16) != 0, AVX10_128) 1308 fs.setIf(ebx&(1<<17) != 0, AVX10_256) 1309 fs.setIf(ebx&(1<<18) != 0, AVX10_512) 1310 } 1311 } 1312 1313 // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) 1314 // EAX 1315 // Bit 00: XSAVEOPT is available. 1316 // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set. 1317 // Bit 02: Supports XGETBV with ECX = 1 if set. 1318 // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set. 1319 // Bits 31 - 04: Reserved. 1320 // EBX 1321 // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS. 1322 // ECX 1323 // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1. 1324 // EDX? 1325 // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved. 1326 if mfi >= 0xd { 1327 if fs.inSet(XSAVE) { 1328 eax, _, _, _ := cpuidex(0xd, 1) 1329 fs.setIf(eax&(1<<0) != 0, XSAVEOPT) 1330 fs.setIf(eax&(1<<1) != 0, XSAVEC) 1331 fs.setIf(eax&(1<<2) != 0, XGETBV1) 1332 fs.setIf(eax&(1<<3) != 0, XSAVES) 1333 } 1334 } 1335 if maxExtendedFunction() >= 0x80000001 { 1336 _, _, c, d := cpuid(0x80000001) 1337 if (c & (1 << 5)) != 0 { 1338 fs.set(LZCNT) 1339 fs.set(POPCNT) 1340 } 1341 // ECX 1342 fs.setIf((c&(1<<0)) != 0, LAHF) 1343 fs.setIf((c&(1<<2)) != 0, SVM) 1344 fs.setIf((c&(1<<6)) != 0, SSE4A) 1345 fs.setIf((c&(1<<10)) != 0, IBS) 1346 fs.setIf((c&(1<<22)) != 0, TOPEXT) 1347 1348 // EDX 1349 fs.setIf(d&(1<<11) != 0, SYSCALL) 1350 fs.setIf(d&(1<<20) != 0, NX) 1351 fs.setIf(d&(1<<22) != 0, MMXEXT) 1352 fs.setIf(d&(1<<23) != 0, MMX) 1353 fs.setIf(d&(1<<24) != 0, FXSR) 1354 fs.setIf(d&(1<<25) != 0, FXSROPT) 1355 fs.setIf(d&(1<<27) != 0, RDTSCP) 1356 fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT) 1357 fs.setIf(d&(1<<31) != 0, AMD3DNOW) 1358 1359 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be 1360 * used unless the OS has AVX support. */ 1361 if fs.inSet(AVX) { 1362 fs.setIf((c&(1<<11)) != 0, XOP) 1363 fs.setIf((c&(1<<16)) != 0, FMA4) 1364 } 1365 1366 } 1367 if maxExtendedFunction() >= 0x80000007 { 1368 _, b, _, d := cpuid(0x80000007) 1369 fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW) 1370 fs.setIf((b&(1<<1)) != 0, SUCCOR) 1371 fs.setIf((b&(1<<2)) != 0, HWA) 1372 fs.setIf((d&(1<<9)) != 0, CPBOOST) 1373 } 1374 1375 if maxExtendedFunction() >= 0x80000008 { 1376 _, b, _, _ := cpuid(0x80000008) 1377 fs.setIf(b&(1<<28) != 0, PSFD) 1378 fs.setIf(b&(1<<27) != 0, CPPC) 1379 fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD) 1380 fs.setIf(b&(1<<23) != 0, PPIN) 1381 fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED) 1382 fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS) 1383 fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP) 1384 fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED) 1385 fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON) 1386 fs.setIf(b&(1<<15) != 0, STIBP) 1387 fs.setIf(b&(1<<14) != 0, IBRS) 1388 fs.setIf((b&(1<<13)) != 0, INT_WBINVD) 1389 fs.setIf(b&(1<<12) != 0, IBPB) 1390 fs.setIf((b&(1<<9)) != 0, WBNOINVD) 1391 fs.setIf((b&(1<<8)) != 0, MCOMMIT) 1392 fs.setIf((b&(1<<4)) != 0, RDPRU) 1393 fs.setIf((b&(1<<3)) != 0, INVLPGB) 1394 fs.setIf((b&(1<<1)) != 0, MSRIRC) 1395 fs.setIf((b&(1<<0)) != 0, CLZERO) 1396 } 1397 1398 if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A { 1399 _, _, _, edx := cpuid(0x8000000A) 1400 fs.setIf((edx>>0)&1 == 1, SVMNP) 1401 fs.setIf((edx>>1)&1 == 1, LBRVIRT) 1402 fs.setIf((edx>>2)&1 == 1, SVML) 1403 fs.setIf((edx>>3)&1 == 1, NRIPS) 1404 fs.setIf((edx>>4)&1 == 1, TSCRATEMSR) 1405 fs.setIf((edx>>5)&1 == 1, VMCBCLEAN) 1406 fs.setIf((edx>>6)&1 == 1, SVMFBASID) 1407 fs.setIf((edx>>7)&1 == 1, SVMDA) 1408 fs.setIf((edx>>10)&1 == 1, SVMPF) 1409 fs.setIf((edx>>12)&1 == 1, SVMPFT) 1410 } 1411 1412 if maxExtendedFunction() >= 0x8000001a { 1413 eax, _, _, _ := cpuid(0x8000001a) 1414 fs.setIf((eax>>0)&1 == 1, FP128) 1415 fs.setIf((eax>>1)&1 == 1, MOVU) 1416 fs.setIf((eax>>2)&1 == 1, FP256) 1417 } 1418 1419 if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { 1420 eax, _, _, _ := cpuid(0x8000001b) 1421 fs.setIf((eax>>0)&1 == 1, IBSFFV) 1422 fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM) 1423 fs.setIf((eax>>2)&1 == 1, IBSOPSAM) 1424 fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT) 1425 fs.setIf((eax>>4)&1 == 1, IBSOPCNT) 1426 fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) 1427 fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) 1428 fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) 1429 fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE) 1430 fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX) 1431 fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1. 1432 fs.setIf((eax>>11)&1 == 1, IBS_ZEN4) 1433 } 1434 1435 if maxExtendedFunction() >= 0x8000001f && vend == AMD { 1436 a, _, _, _ := cpuid(0x8000001f) 1437 fs.setIf((a>>0)&1 == 1, SME) 1438 fs.setIf((a>>1)&1 == 1, SEV) 1439 fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH) 1440 fs.setIf((a>>3)&1 == 1, SEV_ES) 1441 fs.setIf((a>>4)&1 == 1, SEV_SNP) 1442 fs.setIf((a>>5)&1 == 1, VMPL) 1443 fs.setIf((a>>10)&1 == 1, SME_COHERENT) 1444 fs.setIf((a>>11)&1 == 1, SEV_64BIT) 1445 fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED) 1446 fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE) 1447 fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP) 1448 fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST) 1449 fs.setIf((a>>16)&1 == 1, VTE) 1450 fs.setIf((a>>24)&1 == 1, VMSA_REGPROT) 1451 } 1452 1453 if maxExtendedFunction() >= 0x80000021 && vend == AMD { 1454 a, _, _, _ := cpuid(0x80000021) 1455 fs.setIf((a>>31)&1 == 1, SRSO_MSR_FIX) 1456 fs.setIf((a>>30)&1 == 1, SRSO_USER_KERNEL_NO) 1457 fs.setIf((a>>29)&1 == 1, SRSO_NO) 1458 fs.setIf((a>>28)&1 == 1, IBPB_BRTYPE) 1459 fs.setIf((a>>27)&1 == 1, SBPB) 1460 } 1461 1462 if mfi >= 0x20 { 1463 // Microsoft has decided to purposefully hide the information 1464 // of the guest TEE when VMs are being created using Hyper-V. 1465 // 1466 // This leads us to check for the Hyper-V cpuid features 1467 // (0x4000000C), and then for the `ebx` value set. 1468 // 1469 // For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part 1470 // we're mostly interested about,according to: 1471 // https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174 1472 _, ebx, _, _ := cpuid(0x4000000C) 1473 fs.setIf(ebx == 0xbe3, TDX_GUEST) 1474 } 1475 1476 if mfi >= 0x21 { 1477 // Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21). 1478 _, ebx, ecx, edx := cpuid(0x21) 1479 identity := string(valAsString(ebx, edx, ecx)) 1480 fs.setIf(identity == "IntelTDX ", TDX_GUEST) 1481 } 1482 1483 return fs 1484 } 1485 1486 func (c *CPUInfo) supportAVX10() uint8 { 1487 if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) { 1488 _, ebx, _, _ := cpuidex(0x24, 0) 1489 return uint8(ebx) 1490 } 1491 return 0 1492 } 1493 1494 func valAsString(values ...uint32) []byte { 1495 r := make([]byte, 4*len(values)) 1496 for i, v := range values { 1497 dst := r[i*4:] 1498 dst[0] = byte(v & 0xff) 1499 dst[1] = byte((v >> 8) & 0xff) 1500 dst[2] = byte((v >> 16) & 0xff) 1501 dst[3] = byte((v >> 24) & 0xff) 1502 switch { 1503 case dst[0] == 0: 1504 return r[:i*4] 1505 case dst[1] == 0: 1506 return r[:i*4+1] 1507 case dst[2] == 0: 1508 return r[:i*4+2] 1509 case dst[3] == 0: 1510 return r[:i*4+3] 1511 } 1512 } 1513 return r 1514 }