git.sr.ht/~pingoo/stdx@v0.0.0-20240218134121-094174641f6e/cpuinfo/cpuinfo.go (about)

     1  // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
     2  
     3  // package cpuinfo provides information about the CPU running the current program.
     4  //
     5  // CPU features are detected on startup, and kept for fast access through the life of the application.
     6  // Currently x86 / x64 (AMD64) as well as arm64 is supported.
     7  //
     8  // You can access the CPU information by accessing the shared CPU variable of the cpuid library.
     9  //
    10  // Package home: https://github.com/klauspost/cpuid
    11  package cpuinfo
    12  
    13  import (
    14  	"flag"
    15  	"fmt"
    16  	"math"
    17  	"math/bits"
    18  	"os"
    19  	"runtime"
    20  	"strings"
    21  )
    22  
    23  // AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
    24  // and Processor Programming Reference (PPR)
    25  
    26  // Vendor is a representation of a CPU vendor.
    27  type Vendor int
    28  
    29  const (
    30  	VendorUnknown Vendor = iota
    31  	Intel
    32  	AMD
    33  	VIA
    34  	Transmeta
    35  	NSC
    36  	KVM  // Kernel-based Virtual Machine
    37  	MSVM // Microsoft Hyper-V or Windows Virtual PC
    38  	VMware
    39  	XenHVM
    40  	Bhyve
    41  	Hygon
    42  	SiS
    43  	RDC
    44  
    45  	Ampere
    46  	ARM
    47  	Broadcom
    48  	Cavium
    49  	DEC
    50  	Fujitsu
    51  	Infineon
    52  	Motorola
    53  	NVIDIA
    54  	AMCC
    55  	Qualcomm
    56  	Marvell
    57  
    58  	lastVendor
    59  )
    60  
    61  //go:generate stringer -type=FeatureID,Vendor
    62  
    63  // FeatureID is the ID of a specific cpu feature.
    64  type FeatureID int
    65  
    66  const (
    67  	// Keep index -1 as unknown
    68  	UNKNOWN = -1
    69  
    70  	// Add features
    71  	ADX                FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
    72  	AESNI                               // Advanced Encryption Standard New Instructions
    73  	AMD3DNOW                            // AMD 3DNOW
    74  	AMD3DNOWEXT                         // AMD 3DNowExt
    75  	AMXBF16                             // Tile computational operations on BFLOAT16 numbers
    76  	AMXFP16                             // Tile computational operations on FP16 numbers
    77  	AMXINT8                             // Tile computational operations on 8-bit integers
    78  	AMXTILE                             // Tile architecture
    79  	AVX                                 // AVX functions
    80  	AVX2                                // AVX2 functions
    81  	AVX512BF16                          // AVX-512 BFLOAT16 Instructions
    82  	AVX512BITALG                        // AVX-512 Bit Algorithms
    83  	AVX512BW                            // AVX-512 Byte and Word Instructions
    84  	AVX512CD                            // AVX-512 Conflict Detection Instructions
    85  	AVX512DQ                            // AVX-512 Doubleword and Quadword Instructions
    86  	AVX512ER                            // AVX-512 Exponential and Reciprocal Instructions
    87  	AVX512F                             // AVX-512 Foundation
    88  	AVX512FP16                          // AVX-512 FP16 Instructions
    89  	AVX512IFMA                          // AVX-512 Integer Fused Multiply-Add Instructions
    90  	AVX512PF                            // AVX-512 Prefetch Instructions
    91  	AVX512VBMI                          // AVX-512 Vector Bit Manipulation Instructions
    92  	AVX512VBMI2                         // AVX-512 Vector Bit Manipulation Instructions, Version 2
    93  	AVX512VL                            // AVX-512 Vector Length Extensions
    94  	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
    95  	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
    96  	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
    97  	AVXIFMA                             // AVX-IFMA instructions
    98  	AVXNECONVERT                        // AVX-NE-CONVERT instructions
    99  	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one
   100  	AVXVNNI                             // AVX (VEX encoded) VNNI neural network instructions
   101  	AVXVNNIINT8                         // AVX-VNNI-INT8 instructions
   102  	BHI_CTRL                            // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
   103  	BMI1                                // Bit Manipulation Instruction Set 1
   104  	BMI2                                // Bit Manipulation Instruction Set 2
   105  	CETIBT                              // Intel CET Indirect Branch Tracking
   106  	CETSS                               // Intel CET Shadow Stack
   107  	CLDEMOTE                            // Cache Line Demote
   108  	CLMUL                               // Carry-less Multiplication
   109  	CLZERO                              // CLZERO instruction supported
   110  	CMOV                                // i686 CMOV
   111  	CMPCCXADD                           // CMPCCXADD instructions
   112  	CMPSB_SCADBS_SHORT                  // Fast short CMPSB and SCASB
   113  	CMPXCHG8                            // CMPXCHG8 instruction
   114  	CPBOOST                             // Core Performance Boost
   115  	CPPC                                // AMD: Collaborative Processor Performance Control
   116  	CX16                                // CMPXCHG16B Instruction
   117  	EFER_LMSLE_UNS                      // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
   118  	ENQCMD                              // Enqueue Command
   119  	ERMS                                // Enhanced REP MOVSB/STOSB
   120  	F16C                                // Half-precision floating-point conversion
   121  	FLUSH_L1D                           // Flush L1D cache
   122  	FMA3                                // Intel FMA 3. Does not imply AVX.
   123  	FMA4                                // Bulldozer FMA4 functions
   124  	FP128                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
   125  	FP256                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
   126  	FSRM                                // Fast Short Rep Mov
   127  	FXSR                                // FXSAVE, FXRESTOR instructions, CR4 bit 9
   128  	FXSROPT                             // FXSAVE/FXRSTOR optimizations
   129  	GFNI                                // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
   130  	HLE                                 // Hardware Lock Elision
   131  	HRESET                              // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
   132  	HTT                                 // Hyperthreading (enabled)
   133  	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
   134  	HYBRID_CPU                          // This part has CPUs of more than one type.
   135  	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
   136  	IA32_ARCH_CAP                       // IA32_ARCH_CAPABILITIES MSR (Intel)
   137  	IA32_CORE_CAP                       // IA32_CORE_CAPABILITIES MSR
   138  	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
   139  	IBRS                                // AMD: Indirect Branch Restricted Speculation
   140  	IBRS_PREFERRED                      // AMD: IBRS is preferred over software solution
   141  	IBRS_PROVIDES_SMP                   // AMD: IBRS provides Same Mode Protection
   142  	IBS                                 // Instruction Based Sampling (AMD)
   143  	IBSBRNTRGT                          // Instruction Based Sampling Feature (AMD)
   144  	IBSFETCHSAM                         // Instruction Based Sampling Feature (AMD)
   145  	IBSFFV                              // Instruction Based Sampling Feature (AMD)
   146  	IBSOPCNT                            // Instruction Based Sampling Feature (AMD)
   147  	IBSOPCNTEXT                         // Instruction Based Sampling Feature (AMD)
   148  	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
   149  	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
   150  	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
   151  	IBS_FETCH_CTLX                      // AMD: IBS fetch control extended MSR supported
   152  	IBS_OPDATA4                         // AMD: IBS op data 4 MSR supported
   153  	IBS_OPFUSE                          // AMD: Indicates support for IbsOpFuse
   154  	IBS_PREVENTHOST                     // Disallowing IBS use by the host supported
   155  	IBS_ZEN4                            // AMD: Fetch and Op IBS support IBS extensions added with Zen4
   156  	IDPRED_CTRL                         // IPRED_DIS
   157  	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
   158  	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
   159  	LAHF                                // LAHF/SAHF in long mode
   160  	LAM                                 // If set, CPU supports Linear Address Masking
   161  	LBRVIRT                             // LBR virtualization
   162  	LZCNT                               // LZCNT instruction
   163  	MCAOVERFLOW                         // MCA overflow recovery support.
   164  	MCDT_NO                             // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
   165  	MCOMMIT                             // MCOMMIT instruction supported
   166  	MD_CLEAR                            // VERW clears CPU buffers
   167  	MMX                                 // standard MMX
   168  	MMXEXT                              // SSE integer functions or AMD MMX ext
   169  	MOVBE                               // MOVBE instruction (big-endian)
   170  	MOVDIR64B                           // Move 64 Bytes as Direct Store
   171  	MOVDIRI                             // Move Doubleword as Direct Store
   172  	MOVSB_ZL                            // Fast Zero-Length MOVSB
   173  	MOVU                                // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
   174  	MPX                                 // Intel MPX (Memory Protection Extensions)
   175  	MSRIRC                              // Instruction Retired Counter MSR available
   176  	MSRLIST                             // Read/Write List of Model Specific Registers
   177  	MSR_PAGEFLUSH                       // Page Flush MSR available
   178  	NRIPS                               // Indicates support for NRIP save on VMEXIT
   179  	NX                                  // NX (No-Execute) bit
   180  	OSXSAVE                             // XSAVE enabled by OS
   181  	PCONFIG                             // PCONFIG for Intel Multi-Key Total Memory Encryption
   182  	POPCNT                              // POPCNT instruction
   183  	PPIN                                // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
   184  	PREFETCHI                           // PREFETCHIT0/1 instructions
   185  	PSFD                                // Predictive Store Forward Disable
   186  	RDPRU                               // RDPRU instruction supported
   187  	RDRAND                              // RDRAND instruction is available
   188  	RDSEED                              // RDSEED instruction is available
   189  	RDTSCP                              // RDTSCP Instruction
   190  	RRSBA_CTRL                          // Restricted RSB Alternate
   191  	RTM                                 // Restricted Transactional Memory
   192  	RTM_ALWAYS_ABORT                    // Indicates that the loaded microcode is forcing RTM abort.
   193  	SERIALIZE                           // Serialize Instruction Execution
   194  	SEV                                 // AMD Secure Encrypted Virtualization supported
   195  	SEV_64BIT                           // AMD SEV guest execution only allowed from a 64-bit host
   196  	SEV_ALTERNATIVE                     // AMD SEV Alternate Injection supported
   197  	SEV_DEBUGSWAP                       // Full debug state swap supported for SEV-ES guests
   198  	SEV_ES                              // AMD SEV Encrypted State supported
   199  	SEV_RESTRICTED                      // AMD SEV Restricted Injection supported
   200  	SEV_SNP                             // AMD SEV Secure Nested Paging supported
   201  	SGX                                 // Software Guard Extensions
   202  	SGXLC                               // Software Guard Extensions Launch Control
   203  	SHA                                 // Intel SHA Extensions
   204  	SME                                 // AMD Secure Memory Encryption supported
   205  	SME_COHERENT                        // AMD Hardware cache coherency across encryption domains enforced
   206  	SPEC_CTRL_SSBD                      // Speculative Store Bypass Disable
   207  	SRBDS_CTRL                          // SRBDS mitigation MSR available
   208  	SSE                                 // SSE functions
   209  	SSE2                                // P4 SSE functions
   210  	SSE3                                // Prescott SSE3 functions
   211  	SSE4                                // Penryn SSE4.1 functions
   212  	SSE42                               // Nehalem SSE4.2 functions
   213  	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
   214  	SSSE3                               // Conroe SSSE3 functions
   215  	STIBP                               // Single Thread Indirect Branch Predictors
   216  	STIBP_ALWAYSON                      // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
   217  	STOSB_SHORT                         // Fast short STOSB
   218  	SUCCOR                              // Software uncorrectable error containment and recovery capability.
   219  	SVM                                 // AMD Secure Virtual Machine
   220  	SVMDA                               // Indicates support for the SVM decode assists.
   221  	SVMFBASID                           // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
   222  	SVML                                // AMD SVM lock. Indicates support for SVM-Lock.
   223  	SVMNP                               // AMD SVM nested paging
   224  	SVMPF                               // SVM pause intercept filter. Indicates support for the pause intercept filter
   225  	SVMPFT                              // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
   226  	SYSCALL                             // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
   227  	SYSEE                               // SYSENTER and SYSEXIT instructions
   228  	TBM                                 // AMD Trailing Bit Manipulation
   229  	TLB_FLUSH_NESTED                    // AMD: Flushing includes all the nested translations for guest translations
   230  	TME                                 // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
   231  	TOPEXT                              // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
   232  	TSCRATEMSR                          // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
   233  	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
   234  	VAES                                // Vector AES. AVX(512) versions requires additional checks.
   235  	VMCBCLEAN                           // VMCB clean bits. Indicates support for VMCB clean bits.
   236  	VMPL                                // AMD VM Permission Levels supported
   237  	VMSA_REGPROT                        // AMD VMSA Register Protection supported
   238  	VMX                                 // Virtual Machine Extensions
   239  	VPCLMULQDQ                          // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
   240  	VTE                                 // AMD Virtual Transparent Encryption supported
   241  	WAITPKG                             // TPAUSE, UMONITOR, UMWAIT
   242  	WBNOINVD                            // Write Back and Do Not Invalidate Cache
   243  	WRMSRNS                             // Non-Serializing Write to Model Specific Register
   244  	X87                                 // FPU
   245  	XGETBV1                             // Supports XGETBV with ECX = 1
   246  	XOP                                 // Bulldozer XOP functions
   247  	XSAVE                               // XSAVE, XRESTOR, XSETBV, XGETBV
   248  	XSAVEC                              // Supports XSAVEC and the compacted form of XRSTOR.
   249  	XSAVEOPT                            // XSAVEOPT available
   250  	XSAVES                              // Supports XSAVES/XRSTORS and IA32_XSS
   251  
   252  	// ARM features:
   253  	AESARM   // AES instructions
   254  	ARMCPUID // Some CPU ID registers readable at user-level
   255  	ASIMD    // Advanced SIMD
   256  	ASIMDDP  // SIMD Dot Product
   257  	ASIMDHP  // Advanced SIMD half-precision floating point
   258  	ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
   259  	ATOMICS  // Large System Extensions (LSE)
   260  	CRC32    // CRC32/CRC32C instructions
   261  	DCPOP    // Data cache clean to Point of Persistence (DC CVAP)
   262  	EVTSTRM  // Generic timer
   263  	FCMA     // Floatin point complex number addition and multiplication
   264  	FP       // Single-precision and double-precision floating point
   265  	FPHP     // Half-precision floating point
   266  	GPA      // Generic Pointer Authentication
   267  	JSCVT    // Javascript-style double->int convert (FJCVTZS)
   268  	LRCPC    // Weaker release consistency (LDAPR, etc)
   269  	PMULL    // Polynomial Multiply instructions (PMULL/PMULL2)
   270  	SHA1     // SHA-1 instructions (SHA1C, etc)
   271  	SHA2     // SHA-2 instructions (SHA256H, etc)
   272  	SHA3     // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
   273  	SHA512   // SHA512 instructions
   274  	SM3      // SM3 instructions
   275  	SM4      // SM4 instructions
   276  	SVE      // Scalable Vector Extension
   277  	// Keep it last. It automatically defines the size of []flagSet
   278  	lastID
   279  
   280  	firstID FeatureID = UNKNOWN + 1
   281  )
   282  
   283  // CPUInfo contains information about the detected system CPU.
   284  type CPUInfo struct {
   285  	BrandName      string  // Brand name reported by the CPU
   286  	VendorID       Vendor  // Comparable CPU vendor ID
   287  	VendorString   string  // Raw vendor string.
   288  	featureSet     flagSet // Features of the CPU
   289  	PhysicalCores  int     // Number of physical processor cores in your CPU. Will be 0 if undetectable.
   290  	ThreadsPerCore int     // Number of threads per physical core. Will be 1 if undetectable.
   291  	LogicalCores   int     // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
   292  	Family         int     // CPU family number
   293  	Model          int     // CPU model number
   294  	Stepping       int     // CPU stepping info
   295  	CacheLine      int     // Cache line size in bytes. Will be 0 if undetectable.
   296  	Hz             int64   // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
   297  	BoostFreq      int64   // Max clock speed, if known, 0 otherwise
   298  	Cache          struct {
   299  		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
   300  		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
   301  		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
   302  		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
   303  	}
   304  	SGX       SGXSupport
   305  	maxFunc   uint32
   306  	maxExFunc uint32
   307  }
   308  
   309  var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
   310  var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
   311  var xgetbv func(index uint32) (eax, edx uint32)
   312  var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
   313  var darwinHasAVX512 = func() bool { return false }
   314  
   315  // CPU contains information about the CPU as detected on startup,
   316  // or when Detect last was called.
   317  //
   318  // Use this as the primary entry point to you data.
   319  var CPU CPUInfo
   320  
   321  func init() {
   322  	initCPU()
   323  	Detect()
   324  }
   325  
   326  // Detect will re-detect current CPU info.
   327  // This will replace the content of the exported CPU variable.
   328  //
   329  // Unless you expect the CPU to change while you are running your program
   330  // you should not need to call this function.
   331  // If you call this, you must ensure that no other goroutine is accessing the
   332  // exported CPU variable.
   333  func Detect() {
   334  	// Set defaults
   335  	CPU.ThreadsPerCore = 1
   336  	CPU.Cache.L1I = -1
   337  	CPU.Cache.L1D = -1
   338  	CPU.Cache.L2 = -1
   339  	CPU.Cache.L3 = -1
   340  	safe := true
   341  	if detectArmFlag != nil {
   342  		safe = !*detectArmFlag
   343  	}
   344  	addInfo(&CPU, safe)
   345  	if displayFeats != nil && *displayFeats {
   346  		fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
   347  		// Exit with non-zero so tests will print value.
   348  		os.Exit(1)
   349  	}
   350  	if disableFlag != nil {
   351  		s := strings.Split(*disableFlag, ",")
   352  		for _, feat := range s {
   353  			feat := ParseFeature(strings.TrimSpace(feat))
   354  			if feat != UNKNOWN {
   355  				CPU.featureSet.unset(feat)
   356  			}
   357  		}
   358  	}
   359  }
   360  
   361  // DetectARM will detect ARM64 features.
   362  // This is NOT done automatically since it can potentially crash
   363  // if the OS does not handle the command.
   364  // If in the future this can be done safely this function may not
   365  // do anything.
   366  func DetectARM() {
   367  	addInfo(&CPU, false)
   368  }
   369  
   370  var detectArmFlag *bool
   371  var displayFeats *bool
   372  var disableFlag *string
   373  
   374  // Flags will enable flags.
   375  // This must be called *before* flag.Parse AND
   376  // Detect must be called after the flags have been parsed.
   377  // Note that this means that any detection used in init() functions
   378  // will not contain these flags.
   379  func Flags() {
   380  	disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
   381  	displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
   382  	detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
   383  }
   384  
   385  // Supports returns whether the CPU supports all of the requested features.
   386  func (c CPUInfo) Supports(ids ...FeatureID) bool {
   387  	for _, id := range ids {
   388  		if !c.featureSet.inSet(id) {
   389  			return false
   390  		}
   391  	}
   392  	return true
   393  }
   394  
   395  // Has allows for checking a single feature.
   396  // Should be inlined by the compiler.
   397  func (c *CPUInfo) Has(id FeatureID) bool {
   398  	return c.featureSet.inSet(id)
   399  }
   400  
   401  // AnyOf returns whether the CPU supports one or more of the requested features.
   402  func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
   403  	for _, id := range ids {
   404  		if c.featureSet.inSet(id) {
   405  			return true
   406  		}
   407  	}
   408  	return false
   409  }
   410  
   411  // Features contains several features combined for a fast check using
   412  // CpuInfo.HasAll
   413  type Features *flagSet
   414  
   415  // CombineFeatures allows to combine several features for a close to constant time lookup.
   416  func CombineFeatures(ids ...FeatureID) Features {
   417  	var v flagSet
   418  	for _, id := range ids {
   419  		v.set(id)
   420  	}
   421  	return &v
   422  }
   423  
   424  func (c *CPUInfo) HasAll(f Features) bool {
   425  	return c.featureSet.hasSetP(f)
   426  }
   427  
   428  // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
   429  var oneOfLevel = CombineFeatures(SYSEE, SYSCALL)
   430  var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2)
   431  var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
   432  var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
   433  var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
   434  
   435  // X64Level returns the microarchitecture level detected on the CPU.
   436  // If features are lacking or non x64 mode, 0 is returned.
   437  // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
   438  func (c CPUInfo) X64Level() int {
   439  	if !c.featureSet.hasOneOf(oneOfLevel) {
   440  		return 0
   441  	}
   442  	if c.featureSet.hasSetP(level4Features) {
   443  		return 4
   444  	}
   445  	if c.featureSet.hasSetP(level3Features) {
   446  		return 3
   447  	}
   448  	if c.featureSet.hasSetP(level2Features) {
   449  		return 2
   450  	}
   451  	if c.featureSet.hasSetP(level1Features) {
   452  		return 1
   453  	}
   454  	return 0
   455  }
   456  
   457  // Disable will disable one or several features.
   458  func (c *CPUInfo) Disable(ids ...FeatureID) bool {
   459  	for _, id := range ids {
   460  		c.featureSet.unset(id)
   461  	}
   462  	return true
   463  }
   464  
   465  // Enable will disable one or several features even if they were undetected.
   466  // This is of course not recommended for obvious reasons.
   467  func (c *CPUInfo) Enable(ids ...FeatureID) bool {
   468  	for _, id := range ids {
   469  		c.featureSet.set(id)
   470  	}
   471  	return true
   472  }
   473  
   474  // IsVendor returns true if vendor is recognized as Intel
   475  func (c CPUInfo) IsVendor(v Vendor) bool {
   476  	return c.VendorID == v
   477  }
   478  
   479  // FeatureSet returns all available features as strings.
   480  func (c CPUInfo) FeatureSet() []string {
   481  	s := make([]string, 0, c.featureSet.nEnabled())
   482  	s = append(s, c.featureSet.Strings()...)
   483  	return s
   484  }
   485  
   486  // RTCounter returns the 64-bit time-stamp counter
   487  // Uses the RDTSCP instruction. The value 0 is returned
   488  // if the CPU does not support the instruction.
   489  func (c CPUInfo) RTCounter() uint64 {
   490  	if !c.Supports(RDTSCP) {
   491  		return 0
   492  	}
   493  	a, _, _, d := rdtscpAsm()
   494  	return uint64(a) | (uint64(d) << 32)
   495  }
   496  
   497  // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
   498  // This variable is OS dependent, but on Linux contains information
   499  // about the current cpu/core the code is running on.
   500  // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
   501  func (c CPUInfo) Ia32TscAux() uint32 {
   502  	if !c.Supports(RDTSCP) {
   503  		return 0
   504  	}
   505  	_, _, ecx, _ := rdtscpAsm()
   506  	return ecx
   507  }
   508  
   509  // LogicalCPU will return the Logical CPU the code is currently executing on.
   510  // This is likely to change when the OS re-schedules the running thread
   511  // to another CPU.
   512  // If the current core cannot be detected, -1 will be returned.
   513  func (c CPUInfo) LogicalCPU() int {
   514  	if c.maxFunc < 1 {
   515  		return -1
   516  	}
   517  	_, ebx, _, _ := cpuid(1)
   518  	return int(ebx >> 24)
   519  }
   520  
   521  // frequencies tries to compute the clock speed of the CPU. If leaf 15 is
   522  // supported, use it, otherwise parse the brand string. Yes, really.
   523  func (c *CPUInfo) frequencies() {
   524  	c.Hz, c.BoostFreq = 0, 0
   525  	mfi := maxFunctionID()
   526  	if mfi >= 0x15 {
   527  		eax, ebx, ecx, _ := cpuid(0x15)
   528  		if eax != 0 && ebx != 0 && ecx != 0 {
   529  			c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
   530  		}
   531  	}
   532  	if mfi >= 0x16 {
   533  		a, b, _, _ := cpuid(0x16)
   534  		// Base...
   535  		if a&0xffff > 0 {
   536  			c.Hz = int64(a&0xffff) * 1_000_000
   537  		}
   538  		// Boost...
   539  		if b&0xffff > 0 {
   540  			c.BoostFreq = int64(b&0xffff) * 1_000_000
   541  		}
   542  	}
   543  	if c.Hz > 0 {
   544  		return
   545  	}
   546  
   547  	// computeHz determines the official rated speed of a CPU from its brand
   548  	// string. This insanity is *actually the official documented way to do
   549  	// this according to Intel*, prior to leaf 0x15 existing. The official
   550  	// documentation only shows this working for exactly `x.xx` or `xxxx`
   551  	// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
   552  	// sizes.
   553  	model := c.BrandName
   554  	hz := strings.LastIndex(model, "Hz")
   555  	if hz < 3 {
   556  		return
   557  	}
   558  	var multiplier int64
   559  	switch model[hz-1] {
   560  	case 'M':
   561  		multiplier = 1000 * 1000
   562  	case 'G':
   563  		multiplier = 1000 * 1000 * 1000
   564  	case 'T':
   565  		multiplier = 1000 * 1000 * 1000 * 1000
   566  	}
   567  	if multiplier == 0 {
   568  		return
   569  	}
   570  	freq := int64(0)
   571  	divisor := int64(0)
   572  	decimalShift := int64(1)
   573  	var i int
   574  	for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
   575  		if model[i] >= '0' && model[i] <= '9' {
   576  			freq += int64(model[i]-'0') * decimalShift
   577  			decimalShift *= 10
   578  		} else if model[i] == '.' {
   579  			if divisor != 0 {
   580  				return
   581  			}
   582  			divisor = decimalShift
   583  		} else {
   584  			return
   585  		}
   586  	}
   587  	// we didn't find a space
   588  	if i < 0 {
   589  		return
   590  	}
   591  	if divisor != 0 {
   592  		c.Hz = (freq * multiplier) / divisor
   593  		return
   594  	}
   595  	c.Hz = freq * multiplier
   596  }
   597  
   598  // VM Will return true if the cpu id indicates we are in
   599  // a virtual machine.
   600  func (c CPUInfo) VM() bool {
   601  	return CPU.featureSet.inSet(HYPERVISOR)
   602  }
   603  
   604  // flags contains detected cpu features and characteristics
   605  type flags uint64
   606  
   607  // log2(bits_in_uint64)
   608  const flagBitsLog2 = 6
   609  const flagBits = 1 << flagBitsLog2
   610  const flagMask = flagBits - 1
   611  
   612  // flagSet contains detected cpu features and characteristics in an array of flags
   613  type flagSet [(lastID + flagMask) / flagBits]flags
   614  
   615  func (s *flagSet) inSet(feat FeatureID) bool {
   616  	return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
   617  }
   618  
   619  func (s *flagSet) set(feat FeatureID) {
   620  	s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
   621  }
   622  
   623  // setIf will set a feature if boolean is true.
   624  func (s *flagSet) setIf(cond bool, features ...FeatureID) {
   625  	if cond {
   626  		for _, offset := range features {
   627  			s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
   628  		}
   629  	}
   630  }
   631  
   632  func (s *flagSet) unset(offset FeatureID) {
   633  	bit := flags(1 << (offset & flagMask))
   634  	s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
   635  }
   636  
   637  // or with another flagset.
   638  func (s *flagSet) or(other flagSet) {
   639  	for i, v := range other[:] {
   640  		s[i] |= v
   641  	}
   642  }
   643  
   644  // hasSet returns whether all features are present.
   645  func (s *flagSet) hasSet(other flagSet) bool {
   646  	for i, v := range other[:] {
   647  		if s[i]&v != v {
   648  			return false
   649  		}
   650  	}
   651  	return true
   652  }
   653  
   654  // hasSet returns whether all features are present.
   655  func (s *flagSet) hasSetP(other *flagSet) bool {
   656  	for i, v := range other[:] {
   657  		if s[i]&v != v {
   658  			return false
   659  		}
   660  	}
   661  	return true
   662  }
   663  
   664  // hasOneOf returns whether one or more features are present.
   665  func (s *flagSet) hasOneOf(other *flagSet) bool {
   666  	for i, v := range other[:] {
   667  		if s[i]&v != 0 {
   668  			return true
   669  		}
   670  	}
   671  	return false
   672  }
   673  
   674  // nEnabled will return the number of enabled flags.
   675  func (s *flagSet) nEnabled() (n int) {
   676  	for _, v := range s[:] {
   677  		n += bits.OnesCount64(uint64(v))
   678  	}
   679  	return n
   680  }
   681  
   682  func flagSetWith(feat ...FeatureID) flagSet {
   683  	var res flagSet
   684  	for _, f := range feat {
   685  		res.set(f)
   686  	}
   687  	return res
   688  }
   689  
   690  // ParseFeature will parse the string and return the ID of the matching feature.
   691  // Will return UNKNOWN if not found.
   692  func ParseFeature(s string) FeatureID {
   693  	s = strings.ToUpper(s)
   694  	for i := firstID; i < lastID; i++ {
   695  		if i.String() == s {
   696  			return i
   697  		}
   698  	}
   699  	return UNKNOWN
   700  }
   701  
   702  // Strings returns an array of the detected features for FlagsSet.
   703  func (s flagSet) Strings() []string {
   704  	if len(s) == 0 {
   705  		return []string{""}
   706  	}
   707  	r := make([]string, 0)
   708  	for i := firstID; i < lastID; i++ {
   709  		if s.inSet(i) {
   710  			r = append(r, i.String())
   711  		}
   712  	}
   713  	return r
   714  }
   715  
   716  func maxExtendedFunction() uint32 {
   717  	eax, _, _, _ := cpuid(0x80000000)
   718  	return eax
   719  }
   720  
   721  func maxFunctionID() uint32 {
   722  	a, _, _, _ := cpuid(0)
   723  	return a
   724  }
   725  
   726  func brandName() string {
   727  	if maxExtendedFunction() >= 0x80000004 {
   728  		v := make([]uint32, 0, 48)
   729  		for i := uint32(0); i < 3; i++ {
   730  			a, b, c, d := cpuid(0x80000002 + i)
   731  			v = append(v, a, b, c, d)
   732  		}
   733  		return strings.Trim(string(valAsString(v...)), " ")
   734  	}
   735  	return "unknown"
   736  }
   737  
   738  func threadsPerCore() int {
   739  	mfi := maxFunctionID()
   740  	vend, _ := vendorID()
   741  
   742  	if mfi < 0x4 || (vend != Intel && vend != AMD) {
   743  		return 1
   744  	}
   745  
   746  	if mfi < 0xb {
   747  		if vend != Intel {
   748  			return 1
   749  		}
   750  		_, b, _, d := cpuid(1)
   751  		if (d & (1 << 28)) != 0 {
   752  			// v will contain logical core count
   753  			v := (b >> 16) & 255
   754  			if v > 1 {
   755  				a4, _, _, _ := cpuid(4)
   756  				// physical cores
   757  				v2 := (a4 >> 26) + 1
   758  				if v2 > 0 {
   759  					return int(v) / int(v2)
   760  				}
   761  			}
   762  		}
   763  		return 1
   764  	}
   765  	_, b, _, _ := cpuidex(0xb, 0)
   766  	if b&0xffff == 0 {
   767  		if vend == AMD {
   768  			// Workaround for AMD returning 0, assume 2 if >= Zen 2
   769  			// It will be more correct than not.
   770  			fam, _, _ := familyModel()
   771  			_, _, _, d := cpuid(1)
   772  			if (d&(1<<28)) != 0 && fam >= 23 {
   773  				return 2
   774  			}
   775  		}
   776  		return 1
   777  	}
   778  	return int(b & 0xffff)
   779  }
   780  
   781  func logicalCores() int {
   782  	mfi := maxFunctionID()
   783  	v, _ := vendorID()
   784  	switch v {
   785  	case Intel:
   786  		// Use this on old Intel processors
   787  		if mfi < 0xb {
   788  			if mfi < 1 {
   789  				return 0
   790  			}
   791  			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
   792  			// that can be assigned to logical processors in a physical package.
   793  			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
   794  			_, ebx, _, _ := cpuid(1)
   795  			logical := (ebx >> 16) & 0xff
   796  			return int(logical)
   797  		}
   798  		_, b, _, _ := cpuidex(0xb, 1)
   799  		return int(b & 0xffff)
   800  	case AMD, Hygon:
   801  		_, b, _, _ := cpuid(1)
   802  		return int((b >> 16) & 0xff)
   803  	default:
   804  		return 0
   805  	}
   806  }
   807  
   808  func familyModel() (family, model, stepping int) {
   809  	if maxFunctionID() < 0x1 {
   810  		return 0, 0, 0
   811  	}
   812  	eax, _, _, _ := cpuid(1)
   813  	// If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
   814  	family = int((eax >> 8) & 0xf)
   815  	extFam := family == 0x6 // Intel is 0x6, needs extended model.
   816  	if family == 0xf {
   817  		// Add ExtFamily
   818  		family += int((eax >> 20) & 0xff)
   819  		extFam = true
   820  	}
   821  	// If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
   822  	model = int((eax >> 4) & 0xf)
   823  	if extFam {
   824  		// Add ExtModel
   825  		model += int((eax >> 12) & 0xf0)
   826  	}
   827  	stepping = int(eax & 0xf)
   828  	return family, model, stepping
   829  }
   830  
   831  func physicalCores() int {
   832  	v, _ := vendorID()
   833  	switch v {
   834  	case Intel:
   835  		return logicalCores() / threadsPerCore()
   836  	case AMD, Hygon:
   837  		lc := logicalCores()
   838  		tpc := threadsPerCore()
   839  		if lc > 0 && tpc > 0 {
   840  			return lc / tpc
   841  		}
   842  
   843  		// The following is inaccurate on AMD EPYC 7742 64-Core Processor
   844  		if maxExtendedFunction() >= 0x80000008 {
   845  			_, _, c, _ := cpuid(0x80000008)
   846  			if c&0xff > 0 {
   847  				return int(c&0xff) + 1
   848  			}
   849  		}
   850  	}
   851  	return 0
   852  }
   853  
   854  // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
   855  var vendorMapping = map[string]Vendor{
   856  	"AMDisbetter!": AMD,
   857  	"AuthenticAMD": AMD,
   858  	"CentaurHauls": VIA,
   859  	"GenuineIntel": Intel,
   860  	"TransmetaCPU": Transmeta,
   861  	"GenuineTMx86": Transmeta,
   862  	"Geode by NSC": NSC,
   863  	"VIA VIA VIA ": VIA,
   864  	"KVMKVMKVMKVM": KVM,
   865  	"Microsoft Hv": MSVM,
   866  	"VMwareVMware": VMware,
   867  	"XenVMMXenVMM": XenHVM,
   868  	"bhyve bhyve ": Bhyve,
   869  	"HygonGenuine": Hygon,
   870  	"Vortex86 SoC": SiS,
   871  	"SiS SiS SiS ": SiS,
   872  	"RiseRiseRise": SiS,
   873  	"Genuine  RDC": RDC,
   874  }
   875  
   876  func vendorID() (Vendor, string) {
   877  	_, b, c, d := cpuid(0)
   878  	v := string(valAsString(b, d, c))
   879  	vend, ok := vendorMapping[v]
   880  	if !ok {
   881  		return VendorUnknown, v
   882  	}
   883  	return vend, v
   884  }
   885  
   886  func cacheLine() int {
   887  	if maxFunctionID() < 0x1 {
   888  		return 0
   889  	}
   890  
   891  	_, ebx, _, _ := cpuid(1)
   892  	cache := (ebx & 0xff00) >> 5 // cflush size
   893  	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
   894  		_, _, ecx, _ := cpuid(0x80000006)
   895  		cache = ecx & 0xff // cacheline size
   896  	}
   897  	// TODO: Read from Cache and TLB Information
   898  	return int(cache)
   899  }
   900  
   901  func (c *CPUInfo) cacheSize() {
   902  	c.Cache.L1D = -1
   903  	c.Cache.L1I = -1
   904  	c.Cache.L2 = -1
   905  	c.Cache.L3 = -1
   906  	vendor, _ := vendorID()
   907  	switch vendor {
   908  	case Intel:
   909  		if maxFunctionID() < 4 {
   910  			return
   911  		}
   912  		c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
   913  		for i := uint32(0); ; i++ {
   914  			eax, ebx, ecx, _ := cpuidex(4, i)
   915  			cacheType := eax & 15
   916  			if cacheType == 0 {
   917  				break
   918  			}
   919  			cacheLevel := (eax >> 5) & 7
   920  			coherency := int(ebx&0xfff) + 1
   921  			partitions := int((ebx>>12)&0x3ff) + 1
   922  			associativity := int((ebx>>22)&0x3ff) + 1
   923  			sets := int(ecx) + 1
   924  			size := associativity * partitions * coherency * sets
   925  			switch cacheLevel {
   926  			case 1:
   927  				if cacheType == 1 {
   928  					// 1 = Data Cache
   929  					c.Cache.L1D = size
   930  				} else if cacheType == 2 {
   931  					// 2 = Instruction Cache
   932  					c.Cache.L1I = size
   933  				} else {
   934  					if c.Cache.L1D < 0 {
   935  						c.Cache.L1I = size
   936  					}
   937  					if c.Cache.L1I < 0 {
   938  						c.Cache.L1I = size
   939  					}
   940  				}
   941  			case 2:
   942  				c.Cache.L2 = size
   943  			case 3:
   944  				c.Cache.L3 = size
   945  			}
   946  		}
   947  	case AMD, Hygon:
   948  		// Untested.
   949  		if maxExtendedFunction() < 0x80000005 {
   950  			return
   951  		}
   952  		_, _, ecx, edx := cpuid(0x80000005)
   953  		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
   954  		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
   955  
   956  		if maxExtendedFunction() < 0x80000006 {
   957  			return
   958  		}
   959  		_, _, ecx, _ = cpuid(0x80000006)
   960  		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
   961  
   962  		// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
   963  		if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
   964  			return
   965  		}
   966  
   967  		// Xen Hypervisor is buggy and returns the same entry no matter ECX value.
   968  		// Hack: When we encounter the same entry 100 times we break.
   969  		nSame := 0
   970  		var last uint32
   971  		for i := uint32(0); i < math.MaxUint32; i++ {
   972  			eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
   973  
   974  			level := (eax >> 5) & 7
   975  			cacheNumSets := ecx + 1
   976  			cacheLineSize := 1 + (ebx & 2047)
   977  			cachePhysPartitions := 1 + ((ebx >> 12) & 511)
   978  			cacheNumWays := 1 + ((ebx >> 22) & 511)
   979  
   980  			typ := eax & 15
   981  			size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
   982  			if typ == 0 {
   983  				return
   984  			}
   985  
   986  			// Check for the same value repeated.
   987  			comb := eax ^ ebx ^ ecx
   988  			if comb == last {
   989  				nSame++
   990  				if nSame == 100 {
   991  					return
   992  				}
   993  			}
   994  			last = comb
   995  
   996  			switch level {
   997  			case 1:
   998  				switch typ {
   999  				case 1:
  1000  					// Data cache
  1001  					c.Cache.L1D = size
  1002  				case 2:
  1003  					// Inst cache
  1004  					c.Cache.L1I = size
  1005  				default:
  1006  					if c.Cache.L1D < 0 {
  1007  						c.Cache.L1I = size
  1008  					}
  1009  					if c.Cache.L1I < 0 {
  1010  						c.Cache.L1I = size
  1011  					}
  1012  				}
  1013  			case 2:
  1014  				c.Cache.L2 = size
  1015  			case 3:
  1016  				c.Cache.L3 = size
  1017  			}
  1018  		}
  1019  	}
  1020  }
  1021  
  1022  type SGXEPCSection struct {
  1023  	BaseAddress uint64
  1024  	EPCSize     uint64
  1025  }
  1026  
  1027  type SGXSupport struct {
  1028  	Available           bool
  1029  	LaunchControl       bool
  1030  	SGX1Supported       bool
  1031  	SGX2Supported       bool
  1032  	MaxEnclaveSizeNot64 int64
  1033  	MaxEnclaveSize64    int64
  1034  	EPCSections         []SGXEPCSection
  1035  }
  1036  
  1037  func hasSGX(available, lc bool) (rval SGXSupport) {
  1038  	rval.Available = available
  1039  
  1040  	if !available {
  1041  		return
  1042  	}
  1043  
  1044  	rval.LaunchControl = lc
  1045  
  1046  	a, _, _, d := cpuidex(0x12, 0)
  1047  	rval.SGX1Supported = a&0x01 != 0
  1048  	rval.SGX2Supported = a&0x02 != 0
  1049  	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
  1050  	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
  1051  	rval.EPCSections = make([]SGXEPCSection, 0)
  1052  
  1053  	for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
  1054  		eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
  1055  		leafType := eax & 0xf
  1056  
  1057  		if leafType == 0 {
  1058  			// Invalid subleaf, stop iterating
  1059  			break
  1060  		} else if leafType == 1 {
  1061  			// EPC Section subleaf
  1062  			baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
  1063  			size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
  1064  
  1065  			section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
  1066  			rval.EPCSections = append(rval.EPCSections, section)
  1067  		}
  1068  	}
  1069  
  1070  	return
  1071  }
  1072  
  1073  func support() flagSet {
  1074  	var fs flagSet
  1075  	mfi := maxFunctionID()
  1076  	vend, _ := vendorID()
  1077  	if mfi < 0x1 {
  1078  		return fs
  1079  	}
  1080  	family, model, _ := familyModel()
  1081  
  1082  	_, _, c, d := cpuid(1)
  1083  	fs.setIf((d&(1<<0)) != 0, X87)
  1084  	fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
  1085  	fs.setIf((d&(1<<11)) != 0, SYSEE)
  1086  	fs.setIf((d&(1<<15)) != 0, CMOV)
  1087  	fs.setIf((d&(1<<23)) != 0, MMX)
  1088  	fs.setIf((d&(1<<24)) != 0, FXSR)
  1089  	fs.setIf((d&(1<<25)) != 0, FXSROPT)
  1090  	fs.setIf((d&(1<<25)) != 0, SSE)
  1091  	fs.setIf((d&(1<<26)) != 0, SSE2)
  1092  	fs.setIf((c&1) != 0, SSE3)
  1093  	fs.setIf((c&(1<<5)) != 0, VMX)
  1094  	fs.setIf((c&(1<<9)) != 0, SSSE3)
  1095  	fs.setIf((c&(1<<19)) != 0, SSE4)
  1096  	fs.setIf((c&(1<<20)) != 0, SSE42)
  1097  	fs.setIf((c&(1<<25)) != 0, AESNI)
  1098  	fs.setIf((c&(1<<1)) != 0, CLMUL)
  1099  	fs.setIf(c&(1<<22) != 0, MOVBE)
  1100  	fs.setIf(c&(1<<23) != 0, POPCNT)
  1101  	fs.setIf(c&(1<<30) != 0, RDRAND)
  1102  
  1103  	// This bit has been reserved by Intel & AMD for use by hypervisors,
  1104  	// and indicates the presence of a hypervisor.
  1105  	fs.setIf(c&(1<<31) != 0, HYPERVISOR)
  1106  	fs.setIf(c&(1<<29) != 0, F16C)
  1107  	fs.setIf(c&(1<<13) != 0, CX16)
  1108  
  1109  	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
  1110  		fs.setIf(threadsPerCore() > 1, HTT)
  1111  	}
  1112  	if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
  1113  		fs.setIf(threadsPerCore() > 1, HTT)
  1114  	}
  1115  	fs.setIf(c&1<<26 != 0, XSAVE)
  1116  	fs.setIf(c&1<<27 != 0, OSXSAVE)
  1117  	// Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
  1118  	const avxCheck = 1<<26 | 1<<27 | 1<<28
  1119  	if c&avxCheck == avxCheck {
  1120  		// Check for OS support
  1121  		eax, _ := xgetbv(0)
  1122  		if (eax & 0x6) == 0x6 {
  1123  			fs.set(AVX)
  1124  			switch vend {
  1125  			case Intel:
  1126  				// Older than Haswell.
  1127  				fs.setIf(family == 6 && model < 60, AVXSLOW)
  1128  			case AMD:
  1129  				// Older than Zen 2
  1130  				fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
  1131  			}
  1132  		}
  1133  	}
  1134  	// FMA3 can be used with SSE registers, so no OS support is strictly needed.
  1135  	// fma3 and OSXSAVE needed.
  1136  	const fma3Check = 1<<12 | 1<<27
  1137  	fs.setIf(c&fma3Check == fma3Check, FMA3)
  1138  
  1139  	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
  1140  	if mfi >= 7 {
  1141  		_, ebx, ecx, edx := cpuidex(7, 0)
  1142  		if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
  1143  			fs.set(AVX2)
  1144  		}
  1145  		// CPUID.(EAX=7, ECX=0).EBX
  1146  		if (ebx & 0x00000008) != 0 {
  1147  			fs.set(BMI1)
  1148  			fs.setIf((ebx&0x00000100) != 0, BMI2)
  1149  		}
  1150  		fs.setIf(ebx&(1<<2) != 0, SGX)
  1151  		fs.setIf(ebx&(1<<4) != 0, HLE)
  1152  		fs.setIf(ebx&(1<<9) != 0, ERMS)
  1153  		fs.setIf(ebx&(1<<11) != 0, RTM)
  1154  		fs.setIf(ebx&(1<<14) != 0, MPX)
  1155  		fs.setIf(ebx&(1<<18) != 0, RDSEED)
  1156  		fs.setIf(ebx&(1<<19) != 0, ADX)
  1157  		fs.setIf(ebx&(1<<29) != 0, SHA)
  1158  
  1159  		// CPUID.(EAX=7, ECX=0).ECX
  1160  		fs.setIf(ecx&(1<<5) != 0, WAITPKG)
  1161  		fs.setIf(ecx&(1<<7) != 0, CETSS)
  1162  		fs.setIf(ecx&(1<<8) != 0, GFNI)
  1163  		fs.setIf(ecx&(1<<9) != 0, VAES)
  1164  		fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
  1165  		fs.setIf(ecx&(1<<13) != 0, TME)
  1166  		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
  1167  		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
  1168  		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
  1169  		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
  1170  		fs.setIf(ecx&(1<<30) != 0, SGXLC)
  1171  
  1172  		// CPUID.(EAX=7, ECX=0).EDX
  1173  		fs.setIf(edx&(1<<4) != 0, FSRM)
  1174  		fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL)
  1175  		fs.setIf(edx&(1<<10) != 0, MD_CLEAR)
  1176  		fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
  1177  		fs.setIf(edx&(1<<14) != 0, SERIALIZE)
  1178  		fs.setIf(edx&(1<<15) != 0, HYBRID_CPU)
  1179  		fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
  1180  		fs.setIf(edx&(1<<18) != 0, PCONFIG)
  1181  		fs.setIf(edx&(1<<20) != 0, CETIBT)
  1182  		fs.setIf(edx&(1<<26) != 0, IBPB)
  1183  		fs.setIf(edx&(1<<27) != 0, STIBP)
  1184  		fs.setIf(edx&(1<<28) != 0, FLUSH_L1D)
  1185  		fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP)
  1186  		fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
  1187  		fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)
  1188  
  1189  		// CPUID.(EAX=7, ECX=1).EDX
  1190  		fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8)
  1191  		fs.setIf(edx&(1<<5) != 0, AVXNECONVERT)
  1192  		fs.setIf(edx&(1<<14) != 0, PREFETCHI)
  1193  
  1194  		// CPUID.(EAX=7, ECX=1).EAX
  1195  		eax1, _, _, _ := cpuidex(7, 1)
  1196  		fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
  1197  		fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
  1198  		fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
  1199  		fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
  1200  		fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
  1201  		fs.setIf(eax1&(1<<22) != 0, HRESET)
  1202  		fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
  1203  		fs.setIf(eax1&(1<<26) != 0, LAM)
  1204  
  1205  		// Only detect AVX-512 features if XGETBV is supported
  1206  		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
  1207  			// Check for OS support
  1208  			eax, _ := xgetbv(0)
  1209  
  1210  			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
  1211  			// ZMM16-ZMM31 state are enabled by OS)
  1212  			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
  1213  			hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
  1214  			if runtime.GOOS == "darwin" {
  1215  				hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
  1216  			}
  1217  			if hasAVX512 {
  1218  				fs.setIf(ebx&(1<<16) != 0, AVX512F)
  1219  				fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
  1220  				fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
  1221  				fs.setIf(ebx&(1<<26) != 0, AVX512PF)
  1222  				fs.setIf(ebx&(1<<27) != 0, AVX512ER)
  1223  				fs.setIf(ebx&(1<<28) != 0, AVX512CD)
  1224  				fs.setIf(ebx&(1<<30) != 0, AVX512BW)
  1225  				fs.setIf(ebx&(1<<31) != 0, AVX512VL)
  1226  				// ecx
  1227  				fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
  1228  				fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
  1229  				fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
  1230  				fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
  1231  				fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
  1232  				// edx
  1233  				fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
  1234  				fs.setIf(edx&(1<<22) != 0, AMXBF16)
  1235  				fs.setIf(edx&(1<<23) != 0, AVX512FP16)
  1236  				fs.setIf(edx&(1<<24) != 0, AMXTILE)
  1237  				fs.setIf(edx&(1<<25) != 0, AMXINT8)
  1238  				// eax1 = CPUID.(EAX=7, ECX=1).EAX
  1239  				fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
  1240  				fs.setIf(eax1&(1<<19) != 0, WRMSRNS)
  1241  				fs.setIf(eax1&(1<<21) != 0, AMXFP16)
  1242  				fs.setIf(eax1&(1<<27) != 0, MSRLIST)
  1243  			}
  1244  		}
  1245  
  1246  		// CPUID.(EAX=7, ECX=2)
  1247  		_, _, _, edx = cpuidex(7, 2)
  1248  		fs.setIf(edx&(1<<0) != 0, PSFD)
  1249  		fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL)
  1250  		fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL)
  1251  		fs.setIf(edx&(1<<4) != 0, BHI_CTRL)
  1252  		fs.setIf(edx&(1<<5) != 0, MCDT_NO)
  1253  
  1254  	}
  1255  
  1256  	// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
  1257  	// EAX
  1258  	// Bit 00: XSAVEOPT is available.
  1259  	// Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
  1260  	// Bit 02: Supports XGETBV with ECX = 1 if set.
  1261  	// Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
  1262  	// Bits 31 - 04: Reserved.
  1263  	// EBX
  1264  	// Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
  1265  	// ECX
  1266  	// Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
  1267  	// EDX?
  1268  	// Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
  1269  	if mfi >= 0xd {
  1270  		if fs.inSet(XSAVE) {
  1271  			eax, _, _, _ := cpuidex(0xd, 1)
  1272  			fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
  1273  			fs.setIf(eax&(1<<1) != 0, XSAVEC)
  1274  			fs.setIf(eax&(1<<2) != 0, XGETBV1)
  1275  			fs.setIf(eax&(1<<3) != 0, XSAVES)
  1276  		}
  1277  	}
  1278  	if maxExtendedFunction() >= 0x80000001 {
  1279  		_, _, c, d := cpuid(0x80000001)
  1280  		if (c & (1 << 5)) != 0 {
  1281  			fs.set(LZCNT)
  1282  			fs.set(POPCNT)
  1283  		}
  1284  		// ECX
  1285  		fs.setIf((c&(1<<0)) != 0, LAHF)
  1286  		fs.setIf((c&(1<<2)) != 0, SVM)
  1287  		fs.setIf((c&(1<<6)) != 0, SSE4A)
  1288  		fs.setIf((c&(1<<10)) != 0, IBS)
  1289  		fs.setIf((c&(1<<22)) != 0, TOPEXT)
  1290  
  1291  		// EDX
  1292  		fs.setIf(d&(1<<11) != 0, SYSCALL)
  1293  		fs.setIf(d&(1<<20) != 0, NX)
  1294  		fs.setIf(d&(1<<22) != 0, MMXEXT)
  1295  		fs.setIf(d&(1<<23) != 0, MMX)
  1296  		fs.setIf(d&(1<<24) != 0, FXSR)
  1297  		fs.setIf(d&(1<<25) != 0, FXSROPT)
  1298  		fs.setIf(d&(1<<27) != 0, RDTSCP)
  1299  		fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
  1300  		fs.setIf(d&(1<<31) != 0, AMD3DNOW)
  1301  
  1302  		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
  1303  		 * used unless the OS has AVX support. */
  1304  		if fs.inSet(AVX) {
  1305  			fs.setIf((c&(1<<11)) != 0, XOP)
  1306  			fs.setIf((c&(1<<16)) != 0, FMA4)
  1307  		}
  1308  
  1309  	}
  1310  	if maxExtendedFunction() >= 0x80000007 {
  1311  		_, b, _, d := cpuid(0x80000007)
  1312  		fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
  1313  		fs.setIf((b&(1<<1)) != 0, SUCCOR)
  1314  		fs.setIf((b&(1<<2)) != 0, HWA)
  1315  		fs.setIf((d&(1<<9)) != 0, CPBOOST)
  1316  	}
  1317  
  1318  	if maxExtendedFunction() >= 0x80000008 {
  1319  		_, b, _, _ := cpuid(0x80000008)
  1320  		fs.setIf(b&(1<<28) != 0, PSFD)
  1321  		fs.setIf(b&(1<<27) != 0, CPPC)
  1322  		fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD)
  1323  		fs.setIf(b&(1<<23) != 0, PPIN)
  1324  		fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED)
  1325  		fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS)
  1326  		fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP)
  1327  		fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED)
  1328  		fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON)
  1329  		fs.setIf(b&(1<<15) != 0, STIBP)
  1330  		fs.setIf(b&(1<<14) != 0, IBRS)
  1331  		fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
  1332  		fs.setIf(b&(1<<12) != 0, IBPB)
  1333  		fs.setIf((b&(1<<9)) != 0, WBNOINVD)
  1334  		fs.setIf((b&(1<<8)) != 0, MCOMMIT)
  1335  		fs.setIf((b&(1<<4)) != 0, RDPRU)
  1336  		fs.setIf((b&(1<<3)) != 0, INVLPGB)
  1337  		fs.setIf((b&(1<<1)) != 0, MSRIRC)
  1338  		fs.setIf((b&(1<<0)) != 0, CLZERO)
  1339  	}
  1340  
  1341  	if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
  1342  		_, _, _, edx := cpuid(0x8000000A)
  1343  		fs.setIf((edx>>0)&1 == 1, SVMNP)
  1344  		fs.setIf((edx>>1)&1 == 1, LBRVIRT)
  1345  		fs.setIf((edx>>2)&1 == 1, SVML)
  1346  		fs.setIf((edx>>3)&1 == 1, NRIPS)
  1347  		fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
  1348  		fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
  1349  		fs.setIf((edx>>6)&1 == 1, SVMFBASID)
  1350  		fs.setIf((edx>>7)&1 == 1, SVMDA)
  1351  		fs.setIf((edx>>10)&1 == 1, SVMPF)
  1352  		fs.setIf((edx>>12)&1 == 1, SVMPFT)
  1353  	}
  1354  
  1355  	if maxExtendedFunction() >= 0x8000001a {
  1356  		eax, _, _, _ := cpuid(0x8000001a)
  1357  		fs.setIf((eax>>0)&1 == 1, FP128)
  1358  		fs.setIf((eax>>1)&1 == 1, MOVU)
  1359  		fs.setIf((eax>>2)&1 == 1, FP256)
  1360  	}
  1361  
  1362  	if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
  1363  		eax, _, _, _ := cpuid(0x8000001b)
  1364  		fs.setIf((eax>>0)&1 == 1, IBSFFV)
  1365  		fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
  1366  		fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
  1367  		fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
  1368  		fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
  1369  		fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
  1370  		fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
  1371  		fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
  1372  		fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE)
  1373  		fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX)
  1374  		fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1.
  1375  		fs.setIf((eax>>11)&1 == 1, IBS_ZEN4)
  1376  	}
  1377  
  1378  	if maxExtendedFunction() >= 0x8000001f && vend == AMD {
  1379  		a, _, _, _ := cpuid(0x8000001f)
  1380  		fs.setIf((a>>0)&1 == 1, SME)
  1381  		fs.setIf((a>>1)&1 == 1, SEV)
  1382  		fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
  1383  		fs.setIf((a>>3)&1 == 1, SEV_ES)
  1384  		fs.setIf((a>>4)&1 == 1, SEV_SNP)
  1385  		fs.setIf((a>>5)&1 == 1, VMPL)
  1386  		fs.setIf((a>>10)&1 == 1, SME_COHERENT)
  1387  		fs.setIf((a>>11)&1 == 1, SEV_64BIT)
  1388  		fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
  1389  		fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
  1390  		fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
  1391  		fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
  1392  		fs.setIf((a>>16)&1 == 1, VTE)
  1393  		fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
  1394  	}
  1395  
  1396  	return fs
  1397  }
  1398  
  1399  func valAsString(values ...uint32) []byte {
  1400  	r := make([]byte, 4*len(values))
  1401  	for i, v := range values {
  1402  		dst := r[i*4:]
  1403  		dst[0] = byte(v & 0xff)
  1404  		dst[1] = byte((v >> 8) & 0xff)
  1405  		dst[2] = byte((v >> 16) & 0xff)
  1406  		dst[3] = byte((v >> 24) & 0xff)
  1407  		switch {
  1408  		case dst[0] == 0:
  1409  			return r[:i*4]
  1410  		case dst[1] == 0:
  1411  			return r[:i*4+1]
  1412  		case dst[2] == 0:
  1413  			return r[:i*4+2]
  1414  		case dst[3] == 0:
  1415  			return r[:i*4+3]
  1416  		}
  1417  	}
  1418  	return r
  1419  }