catinello.eu/x/cpuid@v0.0.0-20231214173555-81a76c018636/cpuid.go

catinello.eu/x/cpuid@v0.0.0-20231214173555-81a76c018636/cpuid.go (about)

     1  // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
     2  
     3  // Package cpuid provides information about the CPU running the current program.
     4  //
     5  // CPU features are detected on startup, and kept for fast access through the life of the application.
     6  // Currently x86 / x64 (AMD64) as well as arm64 is supported.
     7  //
     8  // You can access the CPU information by accessing the shared CPU variable of the cpuid library.
     9  //
    10  // Package home: https://catinello.eu/x/cpuid
    11  package cpuid
    12  
    13  import (
    14  	"flag"
    15  	"fmt"
    16  	"math"
    17  	"math/bits"
    18  	"os"
    19  	"runtime"
    20  	"strings"
    21  )
    22  
    23  // AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
    24  // and Processor Programming Reference (PPR)
    25  
    26  // Vendor is a representation of a CPU vendor.
    27  type Vendor int
    28  
    29  const (
    30  	VendorUnknown Vendor = iota
    31  	Intel
    32  	AMD
    33  	VIA
    34  	Transmeta
    35  	NSC
    36  	KVM  // Kernel-based Virtual Machine
    37  	MSVM // Microsoft Hyper-V or Windows Virtual PC
    38  	VMware
    39  	XenHVM
    40  	Bhyve
    41  	Hygon
    42  	SiS
    43  	RDC
    44  
    45  	Ampere
    46  	ARM
    47  	Broadcom
    48  	Cavium
    49  	DEC
    50  	Fujitsu
    51  	Infineon
    52  	Motorola
    53  	NVIDIA
    54  	AMCC
    55  	Qualcomm
    56  	Marvell
    57  
    58  	lastVendor
    59  )
    60  
    61  //go:generate stringer -type=FeatureID,Vendor
    62  
    63  // FeatureID is the ID of a specific cpu feature.
    64  type FeatureID int
    65  
    66  const (
    67  	// Keep index -1 as unknown
    68  	UNKNOWN = -1
    69  
    70  	// Add features
    71  	ADX                FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
    72  	AESNI                               // Advanced Encryption Standard New Instructions
    73  	AMD3DNOW                            // AMD 3DNOW
    74  	AMD3DNOWEXT                         // AMD 3DNowExt
    75  	AMXBF16                             // Tile computational operations on BFLOAT16 numbers
    76  	AMXINT8                             // Tile computational operations on 8-bit integers
    77  	AMXTILE                             // Tile architecture
    78  	AVX                                 // AVX functions
    79  	AVX2                                // AVX2 functions
    80  	AVX512BF16                          // AVX-512 BFLOAT16 Instructions
    81  	AVX512BITALG                        // AVX-512 Bit Algorithms
    82  	AVX512BW                            // AVX-512 Byte and Word Instructions
    83  	AVX512CD                            // AVX-512 Conflict Detection Instructions
    84  	AVX512DQ                            // AVX-512 Doubleword and Quadword Instructions
    85  	AVX512ER                            // AVX-512 Exponential and Reciprocal Instructions
    86  	AVX512F                             // AVX-512 Foundation
    87  	AVX512FP16                          // AVX-512 FP16 Instructions
    88  	AVX512IFMA                          // AVX-512 Integer Fused Multiply-Add Instructions
    89  	AVX512PF                            // AVX-512 Prefetch Instructions
    90  	AVX512VBMI                          // AVX-512 Vector Bit Manipulation Instructions
    91  	AVX512VBMI2                         // AVX-512 Vector Bit Manipulation Instructions, Version 2
    92  	AVX512VL                            // AVX-512 Vector Length Extensions
    93  	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
    94  	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
    95  	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
    96  	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one
    97  	AVXVNNI                             // AVX (VEX encoded) VNNI neural network instructions
    98  	BMI1                                // Bit Manipulation Instruction Set 1
    99  	BMI2                                // Bit Manipulation Instruction Set 2
   100  	CETIBT                              // Intel CET Indirect Branch Tracking
   101  	CETSS                               // Intel CET Shadow Stack
   102  	CLDEMOTE                            // Cache Line Demote
   103  	CLMUL                               // Carry-less Multiplication
   104  	CLZERO                              // CLZERO instruction supported
   105  	CMOV                                // i686 CMOV
   106  	CMPSB_SCADBS_SHORT                  // Fast short CMPSB and SCASB
   107  	CMPXCHG8                            // CMPXCHG8 instruction
   108  	CPBOOST                             // Core Performance Boost
   109  	CX16                                // CMPXCHG16B Instruction
   110  	ENQCMD                              // Enqueue Command
   111  	ERMS                                // Enhanced REP MOVSB/STOSB
   112  	F16C                                // Half-precision floating-point conversion
   113  	FMA3                                // Intel FMA 3. Does not imply AVX.
   114  	FMA4                                // Bulldozer FMA4 functions
   115  	FXSR                                // FXSAVE, FXRESTOR instructions, CR4 bit 9
   116  	FXSROPT                             // FXSAVE/FXRSTOR optimizations
   117  	GFNI                                // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
   118  	HLE                                 // Hardware Lock Elision
   119  	HRESET                              // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
   120  	HTT                                 // Hyperthreading (enabled)
   121  	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
   122  	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
   123  	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
   124  	IBS                                 // Instruction Based Sampling (AMD)
   125  	IBSBRNTRGT                          // Instruction Based Sampling Feature (AMD)
   126  	IBSFETCHSAM                         // Instruction Based Sampling Feature (AMD)
   127  	IBSFFV                              // Instruction Based Sampling Feature (AMD)
   128  	IBSOPCNT                            // Instruction Based Sampling Feature (AMD)
   129  	IBSOPCNTEXT                         // Instruction Based Sampling Feature (AMD)
   130  	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
   131  	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
   132  	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
   133  	IBS_PREVENTHOST                     // Disallowing IBS use by the host supported
   134  	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
   135  	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
   136  	LAHF                                // LAHF/SAHF in long mode
   137  	LAM                                 // If set, CPU supports Linear Address Masking
   138  	LBRVIRT                             // LBR virtualization
   139  	LZCNT                               // LZCNT instruction
   140  	MCAOVERFLOW                         // MCA overflow recovery support.
   141  	MCOMMIT                             // MCOMMIT instruction supported
   142  	MMX                                 // standard MMX
   143  	MMXEXT                              // SSE integer functions or AMD MMX ext
   144  	MOVBE                               // MOVBE instruction (big-endian)
   145  	MOVDIR64B                           // Move 64 Bytes as Direct Store
   146  	MOVDIRI                             // Move Doubleword as Direct Store
   147  	MOVSB_ZL                            // Fast Zero-Length MOVSB
   148  	MPX                                 // Intel MPX (Memory Protection Extensions)
   149  	MSRIRC                              // Instruction Retired Counter MSR available
   150  	MSR_PAGEFLUSH                       // Page Flush MSR available
   151  	NRIPS                               // Indicates support for NRIP save on VMEXIT
   152  	NX                                  // NX (No-Execute) bit
   153  	OSXSAVE                             // XSAVE enabled by OS
   154  	PCONFIG                             // PCONFIG for Intel Multi-Key Total Memory Encryption
   155  	POPCNT                              // POPCNT instruction
   156  	RDPRU                               // RDPRU instruction supported
   157  	RDRAND                              // RDRAND instruction is available
   158  	RDSEED                              // RDSEED instruction is available
   159  	RDTSCP                              // RDTSCP Instruction
   160  	RTM                                 // Restricted Transactional Memory
   161  	RTM_ALWAYS_ABORT                    // Indicates that the loaded microcode is forcing RTM abort.
   162  	SERIALIZE                           // Serialize Instruction Execution
   163  	SEV                                 // AMD Secure Encrypted Virtualization supported
   164  	SEV_64BIT                           // AMD SEV guest execution only allowed from a 64-bit host
   165  	SEV_ALTERNATIVE                     // AMD SEV Alternate Injection supported
   166  	SEV_DEBUGSWAP                       // Full debug state swap supported for SEV-ES guests
   167  	SEV_ES                              // AMD SEV Encrypted State supported
   168  	SEV_RESTRICTED                      // AMD SEV Restricted Injection supported
   169  	SEV_SNP                             // AMD SEV Secure Nested Paging supported
   170  	SGX                                 // Software Guard Extensions
   171  	SGXLC                               // Software Guard Extensions Launch Control
   172  	SHA                                 // Intel SHA Extensions
   173  	SME                                 // AMD Secure Memory Encryption supported
   174  	SME_COHERENT                        // AMD Hardware cache coherency across encryption domains enforced
   175  	SSE                                 // SSE functions
   176  	SSE2                                // P4 SSE functions
   177  	SSE3                                // Prescott SSE3 functions
   178  	SSE4                                // Penryn SSE4.1 functions
   179  	SSE42                               // Nehalem SSE4.2 functions
   180  	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
   181  	SSSE3                               // Conroe SSSE3 functions
   182  	STIBP                               // Single Thread Indirect Branch Predictors
   183  	STOSB_SHORT                         // Fast short STOSB
   184  	SUCCOR                              // Software uncorrectable error containment and recovery capability.
   185  	SVM                                 // AMD Secure Virtual Machine
   186  	SVMDA                               // Indicates support for the SVM decode assists.
   187  	SVMFBASID                           // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
   188  	SVML                                // AMD SVM lock. Indicates support for SVM-Lock.
   189  	SVMNP                               // AMD SVM nested paging
   190  	SVMPF                               // SVM pause intercept filter. Indicates support for the pause intercept filter
   191  	SVMPFT                              // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
   192  	SYSCALL                             // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
   193  	SYSEE                               // SYSENTER and SYSEXIT instructions
   194  	TBM                                 // AMD Trailing Bit Manipulation
   195  	TME                                 // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
   196  	TSCRATEMSR                          // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
   197  	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
   198  	VAES                                // Vector AES. AVX(512) versions requires additional checks.
   199  	VMCBCLEAN                           // VMCB clean bits. Indicates support for VMCB clean bits.
   200  	VMPL                                // AMD VM Permission Levels supported
   201  	VMSA_REGPROT                        // AMD VMSA Register Protection supported
   202  	VMX                                 // Virtual Machine Extensions
   203  	VPCLMULQDQ                          // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
   204  	VTE                                 // AMD Virtual Transparent Encryption supported
   205  	WAITPKG                             // TPAUSE, UMONITOR, UMWAIT
   206  	WBNOINVD                            // Write Back and Do Not Invalidate Cache
   207  	X87                                 // FPU
   208  	XGETBV1                             // Supports XGETBV with ECX = 1
   209  	XOP                                 // Bulldozer XOP functions
   210  	XSAVE                               // XSAVE, XRESTOR, XSETBV, XGETBV
   211  	XSAVEC                              // Supports XSAVEC and the compacted form of XRSTOR.
   212  	XSAVEOPT                            // XSAVEOPT available
   213  	XSAVES                              // Supports XSAVES/XRSTORS and IA32_XSS
   214  
   215  	// ARM features:
   216  	AESARM   // AES instructions
   217  	ARMCPUID // Some CPU ID registers readable at user-level
   218  	ASIMD    // Advanced SIMD
   219  	ASIMDDP  // SIMD Dot Product
   220  	ASIMDHP  // Advanced SIMD half-precision floating point
   221  	ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
   222  	ATOMICS  // Large System Extensions (LSE)
   223  	CRC32    // CRC32/CRC32C instructions
   224  	DCPOP    // Data cache clean to Point of Persistence (DC CVAP)
   225  	EVTSTRM  // Generic timer
   226  	FCMA     // Floatin point complex number addition and multiplication
   227  	FP       // Single-precision and double-precision floating point
   228  	FPHP     // Half-precision floating point
   229  	GPA      // Generic Pointer Authentication
   230  	JSCVT    // Javascript-style double->int convert (FJCVTZS)
   231  	LRCPC    // Weaker release consistency (LDAPR, etc)
   232  	PMULL    // Polynomial Multiply instructions (PMULL/PMULL2)
   233  	SHA1     // SHA-1 instructions (SHA1C, etc)
   234  	SHA2     // SHA-2 instructions (SHA256H, etc)
   235  	SHA3     // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
   236  	SHA512   // SHA512 instructions
   237  	SM3      // SM3 instructions
   238  	SM4      // SM4 instructions
   239  	SVE      // Scalable Vector Extension
   240  	// Keep it last. It automatically defines the size of []flagSet
   241  	lastID
   242  
   243  	firstID FeatureID = UNKNOWN + 1
   244  )
   245  
   246  // CPUInfo contains information about the detected system CPU.
   247  type CPUInfo struct {
   248  	BrandName      string  // Brand name reported by the CPU
   249  	VendorID       Vendor  // Comparable CPU vendor ID
   250  	VendorString   string  // Raw vendor string.
   251  	featureSet     flagSet // Features of the CPU
   252  	PhysicalCores  int     // Number of physical processor cores in your CPU. Will be 0 if undetectable.
   253  	ThreadsPerCore int     // Number of threads per physical core. Will be 1 if undetectable.
   254  	LogicalCores   int     // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
   255  	Family         int     // CPU family number
   256  	Model          int     // CPU model number
   257  	Stepping       int     // CPU stepping info
   258  	CacheLine      int     // Cache line size in bytes. Will be 0 if undetectable.
   259  	Hz             int64   // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
   260  	BoostFreq      int64   // Max clock speed, if known, 0 otherwise
   261  	Cache          struct {
   262  		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
   263  		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
   264  		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
   265  		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
   266  	}
   267  	SGX       SGXSupport
   268  	maxFunc   uint32
   269  	maxExFunc uint32
   270  }
   271  
   272  var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
   273  var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
   274  var xgetbv func(index uint32) (eax, edx uint32)
   275  var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
   276  var darwinHasAVX512 = func() bool { return false }
   277  
   278  // CPU contains information about the CPU as detected on startup,
   279  // or when Detect last was called.
   280  //
   281  // Use this as the primary entry point to you data.
   282  var CPU CPUInfo
   283  
   284  func init() {
   285  	initCPU()
   286  	Detect()
   287  }
   288  
   289  // Detect will re-detect current CPU info.
   290  // This will replace the content of the exported CPU variable.
   291  //
   292  // Unless you expect the CPU to change while you are running your program
   293  // you should not need to call this function.
   294  // If you call this, you must ensure that no other goroutine is accessing the
   295  // exported CPU variable.
   296  func Detect() {
   297  	// Set defaults
   298  	CPU.ThreadsPerCore = 1
   299  	CPU.Cache.L1I = -1
   300  	CPU.Cache.L1D = -1
   301  	CPU.Cache.L2 = -1
   302  	CPU.Cache.L3 = -1
   303  	safe := true
   304  	if detectArmFlag != nil {
   305  		safe = !*detectArmFlag
   306  	}
   307  	addInfo(&CPU, safe)
   308  	if displayFeats != nil && *displayFeats {
   309  		fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
   310  		// Exit with non-zero so tests will print value.
   311  		os.Exit(1)
   312  	}
   313  	if disableFlag != nil {
   314  		s := strings.Split(*disableFlag, ",")
   315  		for _, feat := range s {
   316  			feat := ParseFeature(strings.TrimSpace(feat))
   317  			if feat != UNKNOWN {
   318  				CPU.featureSet.unset(feat)
   319  			}
   320  		}
   321  	}
   322  }
   323  
   324  // DetectARM will detect ARM64 features.
   325  // This is NOT done automatically since it can potentially crash
   326  // if the OS does not handle the command.
   327  // If in the future this can be done safely this function may not
   328  // do anything.
   329  func DetectARM() {
   330  	addInfo(&CPU, false)
   331  }
   332  
   333  var detectArmFlag *bool
   334  var displayFeats *bool
   335  var disableFlag *string
   336  
   337  // Flags will enable flags.
   338  // This must be called *before* flag.Parse AND
   339  // Detect must be called after the flags have been parsed.
   340  // Note that this means that any detection used in init() functions
   341  // will not contain these flags.
   342  func Flags() {
   343  	disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
   344  	displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
   345  	detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
   346  }
   347  
   348  // Supports returns whether the CPU supports all of the requested features.
   349  func (c CPUInfo) Supports(ids ...FeatureID) bool {
   350  	for _, id := range ids {
   351  		if !c.featureSet.inSet(id) {
   352  			return false
   353  		}
   354  	}
   355  	return true
   356  }
   357  
   358  // Has allows for checking a single feature.
   359  // Should be inlined by the compiler.
   360  func (c CPUInfo) Has(id FeatureID) bool {
   361  	return c.featureSet.inSet(id)
   362  }
   363  
   364  // AnyOf returns whether the CPU supports one or more of the requested features.
   365  func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
   366  	for _, id := range ids {
   367  		if c.featureSet.inSet(id) {
   368  			return true
   369  		}
   370  	}
   371  	return false
   372  }
   373  
   374  // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
   375  var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)
   376  var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
   377  var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
   378  var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
   379  
   380  // X64Level returns the microarchitecture level detected on the CPU.
   381  // If features are lacking or non x64 mode, 0 is returned.
   382  // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
   383  func (c CPUInfo) X64Level() int {
   384  	if c.featureSet.hasSet(level4Features) {
   385  		return 4
   386  	}
   387  	if c.featureSet.hasSet(level3Features) {
   388  		return 3
   389  	}
   390  	if c.featureSet.hasSet(level2Features) {
   391  		return 2
   392  	}
   393  	if c.featureSet.hasSet(level1Features) {
   394  		return 1
   395  	}
   396  	return 0
   397  }
   398  
   399  // Disable will disable one or several features.
   400  func (c *CPUInfo) Disable(ids ...FeatureID) bool {
   401  	for _, id := range ids {
   402  		c.featureSet.unset(id)
   403  	}
   404  	return true
   405  }
   406  
   407  // Enable will disable one or several features even if they were undetected.
   408  // This is of course not recommended for obvious reasons.
   409  func (c *CPUInfo) Enable(ids ...FeatureID) bool {
   410  	for _, id := range ids {
   411  		c.featureSet.set(id)
   412  	}
   413  	return true
   414  }
   415  
   416  // IsVendor returns true if vendor is recognized as Intel
   417  func (c CPUInfo) IsVendor(v Vendor) bool {
   418  	return c.VendorID == v
   419  }
   420  
   421  // FeatureSet returns all available features as strings.
   422  func (c CPUInfo) FeatureSet() []string {
   423  	s := make([]string, 0, c.featureSet.nEnabled())
   424  	s = append(s, c.featureSet.Strings()...)
   425  	return s
   426  }
   427  
   428  // RTCounter returns the 64-bit time-stamp counter
   429  // Uses the RDTSCP instruction. The value 0 is returned
   430  // if the CPU does not support the instruction.
   431  func (c CPUInfo) RTCounter() uint64 {
   432  	if !c.Supports(RDTSCP) {
   433  		return 0
   434  	}
   435  	a, _, _, d := rdtscpAsm()
   436  	return uint64(a) | (uint64(d) << 32)
   437  }
   438  
   439  // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
   440  // This variable is OS dependent, but on Linux contains information
   441  // about the current cpu/core the code is running on.
   442  // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
   443  func (c CPUInfo) Ia32TscAux() uint32 {
   444  	if !c.Supports(RDTSCP) {
   445  		return 0
   446  	}
   447  	_, _, ecx, _ := rdtscpAsm()
   448  	return ecx
   449  }
   450  
   451  // LogicalCPU will return the Logical CPU the code is currently executing on.
   452  // This is likely to change when the OS re-schedules the running thread
   453  // to another CPU.
   454  // If the current core cannot be detected, -1 will be returned.
   455  func (c CPUInfo) LogicalCPU() int {
   456  	if c.maxFunc < 1 {
   457  		return -1
   458  	}
   459  	_, ebx, _, _ := cpuid(1)
   460  	return int(ebx >> 24)
   461  }
   462  
   463  // frequencies tries to compute the clock speed of the CPU. If leaf 15 is
   464  // supported, use it, otherwise parse the brand string. Yes, really.
   465  func (c *CPUInfo) frequencies() {
   466  	c.Hz, c.BoostFreq = 0, 0
   467  	mfi := maxFunctionID()
   468  	if mfi >= 0x15 {
   469  		eax, ebx, ecx, _ := cpuid(0x15)
   470  		if eax != 0 && ebx != 0 && ecx != 0 {
   471  			c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
   472  		}
   473  	}
   474  	if mfi >= 0x16 {
   475  		a, b, _, _ := cpuid(0x16)
   476  		// Base...
   477  		if a&0xffff > 0 {
   478  			c.Hz = int64(a&0xffff) * 1_000_000
   479  		}
   480  		// Boost...
   481  		if b&0xffff > 0 {
   482  			c.BoostFreq = int64(b&0xffff) * 1_000_000
   483  		}
   484  	}
   485  	if c.Hz > 0 {
   486  		return
   487  	}
   488  
   489  	// computeHz determines the official rated speed of a CPU from its brand
   490  	// string. This insanity is *actually the official documented way to do
   491  	// this according to Intel*, prior to leaf 0x15 existing. The official
   492  	// documentation only shows this working for exactly `x.xx` or `xxxx`
   493  	// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
   494  	// sizes.
   495  	model := c.BrandName
   496  	hz := strings.LastIndex(model, "Hz")
   497  	if hz < 3 {
   498  		return
   499  	}
   500  	var multiplier int64
   501  	switch model[hz-1] {
   502  	case 'M':
   503  		multiplier = 1000 * 1000
   504  	case 'G':
   505  		multiplier = 1000 * 1000 * 1000
   506  	case 'T':
   507  		multiplier = 1000 * 1000 * 1000 * 1000
   508  	}
   509  	if multiplier == 0 {
   510  		return
   511  	}
   512  	freq := int64(0)
   513  	divisor := int64(0)
   514  	decimalShift := int64(1)
   515  	var i int
   516  	for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
   517  		if model[i] >= '0' && model[i] <= '9' {
   518  			freq += int64(model[i]-'0') * decimalShift
   519  			decimalShift *= 10
   520  		} else if model[i] == '.' {
   521  			if divisor != 0 {
   522  				return
   523  			}
   524  			divisor = decimalShift
   525  		} else {
   526  			return
   527  		}
   528  	}
   529  	// we didn't find a space
   530  	if i < 0 {
   531  		return
   532  	}
   533  	if divisor != 0 {
   534  		c.Hz = (freq * multiplier) / divisor
   535  		return
   536  	}
   537  	c.Hz = freq * multiplier
   538  }
   539  
   540  // VM Will return true if the cpu id indicates we are in
   541  // a virtual machine.
   542  func (c CPUInfo) VM() bool {
   543  	return CPU.featureSet.inSet(HYPERVISOR)
   544  }
   545  
   546  // flags contains detected cpu features and characteristics
   547  type flags uint64
   548  
   549  // log2(bits_in_uint64)
   550  const flagBitsLog2 = 6
   551  const flagBits = 1 << flagBitsLog2
   552  const flagMask = flagBits - 1
   553  
   554  // flagSet contains detected cpu features and characteristics in an array of flags
   555  type flagSet [(lastID + flagMask) / flagBits]flags
   556  
   557  func (s flagSet) inSet(feat FeatureID) bool {
   558  	return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
   559  }
   560  
   561  func (s *flagSet) set(feat FeatureID) {
   562  	s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
   563  }
   564  
   565  // setIf will set a feature if boolean is true.
   566  func (s *flagSet) setIf(cond bool, features ...FeatureID) {
   567  	if cond {
   568  		for _, offset := range features {
   569  			s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
   570  		}
   571  	}
   572  }
   573  
   574  func (s *flagSet) unset(offset FeatureID) {
   575  	bit := flags(1 << (offset & flagMask))
   576  	s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
   577  }
   578  
   579  // or with another flagset.
   580  func (s *flagSet) or(other flagSet) {
   581  	for i, v := range other[:] {
   582  		s[i] |= v
   583  	}
   584  }
   585  
   586  // hasSet returns whether all features are present.
   587  func (s flagSet) hasSet(other flagSet) bool {
   588  	for i, v := range other[:] {
   589  		if s[i]&v != v {
   590  			return false
   591  		}
   592  	}
   593  	return true
   594  }
   595  
   596  // nEnabled will return the number of enabled flags.
   597  func (s flagSet) nEnabled() (n int) {
   598  	for _, v := range s[:] {
   599  		n += bits.OnesCount64(uint64(v))
   600  	}
   601  	return n
   602  }
   603  
   604  func flagSetWith(feat ...FeatureID) flagSet {
   605  	var res flagSet
   606  	for _, f := range feat {
   607  		res.set(f)
   608  	}
   609  	return res
   610  }
   611  
   612  // ParseFeature will parse the string and return the ID of the matching feature.
   613  // Will return UNKNOWN if not found.
   614  func ParseFeature(s string) FeatureID {
   615  	s = strings.ToUpper(s)
   616  	for i := firstID; i < lastID; i++ {
   617  		if i.String() == s {
   618  			return i
   619  		}
   620  	}
   621  	return UNKNOWN
   622  }
   623  
   624  // Strings returns an array of the detected features for FlagsSet.
   625  func (s flagSet) Strings() []string {
   626  	if len(s) == 0 {
   627  		return []string{""}
   628  	}
   629  	r := make([]string, 0)
   630  	for i := firstID; i < lastID; i++ {
   631  		if s.inSet(i) {
   632  			r = append(r, i.String())
   633  		}
   634  	}
   635  	return r
   636  }
   637  
   638  func maxExtendedFunction() uint32 {
   639  	eax, _, _, _ := cpuid(0x80000000)
   640  	return eax
   641  }
   642  
   643  func maxFunctionID() uint32 {
   644  	a, _, _, _ := cpuid(0)
   645  	return a
   646  }
   647  
   648  func brandName() string {
   649  	if maxExtendedFunction() >= 0x80000004 {
   650  		v := make([]uint32, 0, 48)
   651  		for i := uint32(0); i < 3; i++ {
   652  			a, b, c, d := cpuid(0x80000002 + i)
   653  			v = append(v, a, b, c, d)
   654  		}
   655  		return strings.Trim(string(valAsString(v...)), " ")
   656  	}
   657  	return "unknown"
   658  }
   659  
   660  func threadsPerCore() int {
   661  	mfi := maxFunctionID()
   662  	vend, _ := vendorID()
   663  
   664  	if mfi < 0x4 || (vend != Intel && vend != AMD) {
   665  		return 1
   666  	}
   667  
   668  	if mfi < 0xb {
   669  		if vend != Intel {
   670  			return 1
   671  		}
   672  		_, b, _, d := cpuid(1)
   673  		if (d & (1 << 28)) != 0 {
   674  			// v will contain logical core count
   675  			v := (b >> 16) & 255
   676  			if v > 1 {
   677  				a4, _, _, _ := cpuid(4)
   678  				// physical cores
   679  				v2 := (a4 >> 26) + 1
   680  				if v2 > 0 {
   681  					return int(v) / int(v2)
   682  				}
   683  			}
   684  		}
   685  		return 1
   686  	}
   687  	_, b, _, _ := cpuidex(0xb, 0)
   688  	if b&0xffff == 0 {
   689  		if vend == AMD {
   690  			// Workaround for AMD returning 0, assume 2 if >= Zen 2
   691  			// It will be more correct than not.
   692  			fam, _, _ := familyModel()
   693  			_, _, _, d := cpuid(1)
   694  			if (d&(1<<28)) != 0 && fam >= 23 {
   695  				return 2
   696  			}
   697  		}
   698  		return 1
   699  	}
   700  	return int(b & 0xffff)
   701  }
   702  
   703  func logicalCores() int {
   704  	mfi := maxFunctionID()
   705  	v, _ := vendorID()
   706  	switch v {
   707  	case Intel:
   708  		// Use this on old Intel processors
   709  		if mfi < 0xb {
   710  			if mfi < 1 {
   711  				return 0
   712  			}
   713  			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
   714  			// that can be assigned to logical processors in a physical package.
   715  			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
   716  			_, ebx, _, _ := cpuid(1)
   717  			logical := (ebx >> 16) & 0xff
   718  			return int(logical)
   719  		}
   720  		_, b, _, _ := cpuidex(0xb, 1)
   721  		return int(b & 0xffff)
   722  	case AMD, Hygon:
   723  		_, b, _, _ := cpuid(1)
   724  		return int((b >> 16) & 0xff)
   725  	default:
   726  		return 0
   727  	}
   728  }
   729  
   730  func familyModel() (family, model, stepping int) {
   731  	if maxFunctionID() < 0x1 {
   732  		return 0, 0, 0
   733  	}
   734  	eax, _, _, _ := cpuid(1)
   735  	// If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
   736  	family = int((eax >> 8) & 0xf)
   737  	extFam := family == 0x6 // Intel is 0x6, needs extended model.
   738  	if family == 0xf {
   739  		// Add ExtFamily
   740  		family += int((eax >> 20) & 0xff)
   741  		extFam = true
   742  	}
   743  	// If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
   744  	model = int((eax >> 4) & 0xf)
   745  	if extFam {
   746  		// Add ExtModel
   747  		model += int((eax >> 12) & 0xf0)
   748  	}
   749  	stepping = int(eax & 0xf)
   750  	return family, model, stepping
   751  }
   752  
   753  func physicalCores() int {
   754  	v, _ := vendorID()
   755  	switch v {
   756  	case Intel:
   757  		return logicalCores() / threadsPerCore()
   758  	case AMD, Hygon:
   759  		lc := logicalCores()
   760  		tpc := threadsPerCore()
   761  		if lc > 0 && tpc > 0 {
   762  			return lc / tpc
   763  		}
   764  
   765  		// The following is inaccurate on AMD EPYC 7742 64-Core Processor
   766  		if maxExtendedFunction() >= 0x80000008 {
   767  			_, _, c, _ := cpuid(0x80000008)
   768  			if c&0xff > 0 {
   769  				return int(c&0xff) + 1
   770  			}
   771  		}
   772  	}
   773  	return 0
   774  }
   775  
   776  // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
   777  var vendorMapping = map[string]Vendor{
   778  	"AMDisbetter!": AMD,
   779  	"AuthenticAMD": AMD,
   780  	"CentaurHauls": VIA,
   781  	"GenuineIntel": Intel,
   782  	"TransmetaCPU": Transmeta,
   783  	"GenuineTMx86": Transmeta,
   784  	"Geode by NSC": NSC,
   785  	"VIA VIA VIA ": VIA,
   786  	"KVMKVMKVMKVM": KVM,
   787  	"Microsoft Hv": MSVM,
   788  	"VMwareVMware": VMware,
   789  	"XenVMMXenVMM": XenHVM,
   790  	"bhyve bhyve ": Bhyve,
   791  	"HygonGenuine": Hygon,
   792  	"Vortex86 SoC": SiS,
   793  	"SiS SiS SiS ": SiS,
   794  	"RiseRiseRise": SiS,
   795  	"Genuine  RDC": RDC,
   796  }
   797  
   798  func vendorID() (Vendor, string) {
   799  	_, b, c, d := cpuid(0)
   800  	v := string(valAsString(b, d, c))
   801  	vend, ok := vendorMapping[v]
   802  	if !ok {
   803  		return VendorUnknown, v
   804  	}
   805  	return vend, v
   806  }
   807  
   808  func cacheLine() int {
   809  	if maxFunctionID() < 0x1 {
   810  		return 0
   811  	}
   812  
   813  	_, ebx, _, _ := cpuid(1)
   814  	cache := (ebx & 0xff00) >> 5 // cflush size
   815  	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
   816  		_, _, ecx, _ := cpuid(0x80000006)
   817  		cache = ecx & 0xff // cacheline size
   818  	}
   819  	// TODO: Read from Cache and TLB Information
   820  	return int(cache)
   821  }
   822  
   823  func (c *CPUInfo) cacheSize() {
   824  	c.Cache.L1D = -1
   825  	c.Cache.L1I = -1
   826  	c.Cache.L2 = -1
   827  	c.Cache.L3 = -1
   828  	vendor, _ := vendorID()
   829  	switch vendor {
   830  	case Intel:
   831  		if maxFunctionID() < 4 {
   832  			return
   833  		}
   834  		c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
   835  		for i := uint32(0); ; i++ {
   836  			eax, ebx, ecx, _ := cpuidex(4, i)
   837  			cacheType := eax & 15
   838  			if cacheType == 0 {
   839  				break
   840  			}
   841  			cacheLevel := (eax >> 5) & 7
   842  			coherency := int(ebx&0xfff) + 1
   843  			partitions := int((ebx>>12)&0x3ff) + 1
   844  			associativity := int((ebx>>22)&0x3ff) + 1
   845  			sets := int(ecx) + 1
   846  			size := associativity * partitions * coherency * sets
   847  			switch cacheLevel {
   848  			case 1:
   849  				if cacheType == 1 {
   850  					// 1 = Data Cache
   851  					c.Cache.L1D = size
   852  				} else if cacheType == 2 {
   853  					// 2 = Instruction Cache
   854  					c.Cache.L1I = size
   855  				} else {
   856  					if c.Cache.L1D < 0 {
   857  						c.Cache.L1I = size
   858  					}
   859  					if c.Cache.L1I < 0 {
   860  						c.Cache.L1I = size
   861  					}
   862  				}
   863  			case 2:
   864  				c.Cache.L2 = size
   865  			case 3:
   866  				c.Cache.L3 = size
   867  			}
   868  		}
   869  	case AMD, Hygon:
   870  		// Untested.
   871  		if maxExtendedFunction() < 0x80000005 {
   872  			return
   873  		}
   874  		_, _, ecx, edx := cpuid(0x80000005)
   875  		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
   876  		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
   877  
   878  		if maxExtendedFunction() < 0x80000006 {
   879  			return
   880  		}
   881  		_, _, ecx, _ = cpuid(0x80000006)
   882  		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
   883  
   884  		// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
   885  		if maxExtendedFunction() < 0x8000001D {
   886  			return
   887  		}
   888  
   889  		// Xen Hypervisor is buggy and returns the same entry no matter ECX value.
   890  		// Hack: When we encounter the same entry 100 times we break.
   891  		nSame := 0
   892  		var last uint32
   893  		for i := uint32(0); i < math.MaxUint32; i++ {
   894  			eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
   895  
   896  			level := (eax >> 5) & 7
   897  			cacheNumSets := ecx + 1
   898  			cacheLineSize := 1 + (ebx & 2047)
   899  			cachePhysPartitions := 1 + ((ebx >> 12) & 511)
   900  			cacheNumWays := 1 + ((ebx >> 22) & 511)
   901  
   902  			typ := eax & 15
   903  			size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
   904  			if typ == 0 {
   905  				return
   906  			}
   907  
   908  			// Check for the same value repeated.
   909  			comb := eax ^ ebx ^ ecx
   910  			if comb == last {
   911  				nSame++
   912  				if nSame == 100 {
   913  					return
   914  				}
   915  			}
   916  			last = comb
   917  
   918  			switch level {
   919  			case 1:
   920  				switch typ {
   921  				case 1:
   922  					// Data cache
   923  					c.Cache.L1D = size
   924  				case 2:
   925  					// Inst cache
   926  					c.Cache.L1I = size
   927  				default:
   928  					if c.Cache.L1D < 0 {
   929  						c.Cache.L1I = size
   930  					}
   931  					if c.Cache.L1I < 0 {
   932  						c.Cache.L1I = size
   933  					}
   934  				}
   935  			case 2:
   936  				c.Cache.L2 = size
   937  			case 3:
   938  				c.Cache.L3 = size
   939  			}
   940  		}
   941  	}
   942  }
   943  
   944  type SGXEPCSection struct {
   945  	BaseAddress uint64
   946  	EPCSize     uint64
   947  }
   948  
   949  type SGXSupport struct {
   950  	Available           bool
   951  	LaunchControl       bool
   952  	SGX1Supported       bool
   953  	SGX2Supported       bool
   954  	MaxEnclaveSizeNot64 int64
   955  	MaxEnclaveSize64    int64
   956  	EPCSections         []SGXEPCSection
   957  }
   958  
   959  func hasSGX(available, lc bool) (rval SGXSupport) {
   960  	rval.Available = available
   961  
   962  	if !available {
   963  		return
   964  	}
   965  
   966  	rval.LaunchControl = lc
   967  
   968  	a, _, _, d := cpuidex(0x12, 0)
   969  	rval.SGX1Supported = a&0x01 != 0
   970  	rval.SGX2Supported = a&0x02 != 0
   971  	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
   972  	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
   973  	rval.EPCSections = make([]SGXEPCSection, 0)
   974  
   975  	for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
   976  		eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
   977  		leafType := eax & 0xf
   978  
   979  		if leafType == 0 {
   980  			// Invalid subleaf, stop iterating
   981  			break
   982  		} else if leafType == 1 {
   983  			// EPC Section subleaf
   984  			baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
   985  			size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
   986  
   987  			section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
   988  			rval.EPCSections = append(rval.EPCSections, section)
   989  		}
   990  	}
   991  
   992  	return
   993  }
   994  
   995  func support() flagSet {
   996  	var fs flagSet
   997  	mfi := maxFunctionID()
   998  	vend, _ := vendorID()
   999  	if mfi < 0x1 {
  1000  		return fs
  1001  	}
  1002  	family, model, _ := familyModel()
  1003  
  1004  	_, _, c, d := cpuid(1)
  1005  	fs.setIf((d&(1<<0)) != 0, X87)
  1006  	fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
  1007  	fs.setIf((d&(1<<11)) != 0, SYSEE)
  1008  	fs.setIf((d&(1<<15)) != 0, CMOV)
  1009  	fs.setIf((d&(1<<23)) != 0, MMX)
  1010  	fs.setIf((d&(1<<24)) != 0, FXSR)
  1011  	fs.setIf((d&(1<<25)) != 0, FXSROPT)
  1012  	fs.setIf((d&(1<<25)) != 0, SSE)
  1013  	fs.setIf((d&(1<<26)) != 0, SSE2)
  1014  	fs.setIf((c&1) != 0, SSE3)
  1015  	fs.setIf((c&(1<<5)) != 0, VMX)
  1016  	fs.setIf((c&(1<<9)) != 0, SSSE3)
  1017  	fs.setIf((c&(1<<19)) != 0, SSE4)
  1018  	fs.setIf((c&(1<<20)) != 0, SSE42)
  1019  	fs.setIf((c&(1<<25)) != 0, AESNI)
  1020  	fs.setIf((c&(1<<1)) != 0, CLMUL)
  1021  	fs.setIf(c&(1<<22) != 0, MOVBE)
  1022  	fs.setIf(c&(1<<23) != 0, POPCNT)
  1023  	fs.setIf(c&(1<<30) != 0, RDRAND)
  1024  
  1025  	// This bit has been reserved by Intel & AMD for use by hypervisors,
  1026  	// and indicates the presence of a hypervisor.
  1027  	fs.setIf(c&(1<<31) != 0, HYPERVISOR)
  1028  	fs.setIf(c&(1<<29) != 0, F16C)
  1029  	fs.setIf(c&(1<<13) != 0, CX16)
  1030  
  1031  	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
  1032  		fs.setIf(threadsPerCore() > 1, HTT)
  1033  	}
  1034  	if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
  1035  		fs.setIf(threadsPerCore() > 1, HTT)
  1036  	}
  1037  	fs.setIf(c&1<<26 != 0, XSAVE)
  1038  	fs.setIf(c&1<<27 != 0, OSXSAVE)
  1039  	// Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
  1040  	const avxCheck = 1<<26 | 1<<27 | 1<<28
  1041  	if c&avxCheck == avxCheck {
  1042  		// Check for OS support
  1043  		eax, _ := xgetbv(0)
  1044  		if (eax & 0x6) == 0x6 {
  1045  			fs.set(AVX)
  1046  			switch vend {
  1047  			case Intel:
  1048  				// Older than Haswell.
  1049  				fs.setIf(family == 6 && model < 60, AVXSLOW)
  1050  			case AMD:
  1051  				// Older than Zen 2
  1052  				fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
  1053  			}
  1054  		}
  1055  	}
  1056  	// FMA3 can be used with SSE registers, so no OS support is strictly needed.
  1057  	// fma3 and OSXSAVE needed.
  1058  	const fma3Check = 1<<12 | 1<<27
  1059  	fs.setIf(c&fma3Check == fma3Check, FMA3)
  1060  
  1061  	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
  1062  	if mfi >= 7 {
  1063  		_, ebx, ecx, edx := cpuidex(7, 0)
  1064  		if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
  1065  			fs.set(AVX2)
  1066  		}
  1067  		// CPUID.(EAX=7, ECX=0).EBX
  1068  		if (ebx & 0x00000008) != 0 {
  1069  			fs.set(BMI1)
  1070  			fs.setIf((ebx&0x00000100) != 0, BMI2)
  1071  		}
  1072  		fs.setIf(ebx&(1<<2) != 0, SGX)
  1073  		fs.setIf(ebx&(1<<4) != 0, HLE)
  1074  		fs.setIf(ebx&(1<<9) != 0, ERMS)
  1075  		fs.setIf(ebx&(1<<11) != 0, RTM)
  1076  		fs.setIf(ebx&(1<<14) != 0, MPX)
  1077  		fs.setIf(ebx&(1<<18) != 0, RDSEED)
  1078  		fs.setIf(ebx&(1<<19) != 0, ADX)
  1079  		fs.setIf(ebx&(1<<29) != 0, SHA)
  1080  
  1081  		// CPUID.(EAX=7, ECX=0).ECX
  1082  		fs.setIf(ecx&(1<<5) != 0, WAITPKG)
  1083  		fs.setIf(ecx&(1<<7) != 0, CETSS)
  1084  		fs.setIf(ecx&(1<<8) != 0, GFNI)
  1085  		fs.setIf(ecx&(1<<9) != 0, VAES)
  1086  		fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
  1087  		fs.setIf(ecx&(1<<13) != 0, TME)
  1088  		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
  1089  		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
  1090  		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
  1091  		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
  1092  		fs.setIf(ecx&(1<<30) != 0, SGXLC)
  1093  
  1094  		// CPUID.(EAX=7, ECX=0).EDX
  1095  		fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
  1096  		fs.setIf(edx&(1<<14) != 0, SERIALIZE)
  1097  		fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
  1098  		fs.setIf(edx&(1<<18) != 0, PCONFIG)
  1099  		fs.setIf(edx&(1<<20) != 0, CETIBT)
  1100  		fs.setIf(edx&(1<<26) != 0, IBPB)
  1101  		fs.setIf(edx&(1<<27) != 0, STIBP)
  1102  
  1103  		// CPUID.(EAX=7, ECX=1)
  1104  		eax1, _, _, _ := cpuidex(7, 1)
  1105  		fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
  1106  		fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
  1107  		fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
  1108  		fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
  1109  		fs.setIf(eax1&(1<<22) != 0, HRESET)
  1110  		fs.setIf(eax1&(1<<26) != 0, LAM)
  1111  
  1112  		// Only detect AVX-512 features if XGETBV is supported
  1113  		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
  1114  			// Check for OS support
  1115  			eax, _ := xgetbv(0)
  1116  
  1117  			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
  1118  			// ZMM16-ZMM31 state are enabled by OS)
  1119  			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
  1120  			hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
  1121  			if runtime.GOOS == "darwin" {
  1122  				hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
  1123  			}
  1124  			if hasAVX512 {
  1125  				fs.setIf(ebx&(1<<16) != 0, AVX512F)
  1126  				fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
  1127  				fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
  1128  				fs.setIf(ebx&(1<<26) != 0, AVX512PF)
  1129  				fs.setIf(ebx&(1<<27) != 0, AVX512ER)
  1130  				fs.setIf(ebx&(1<<28) != 0, AVX512CD)
  1131  				fs.setIf(ebx&(1<<30) != 0, AVX512BW)
  1132  				fs.setIf(ebx&(1<<31) != 0, AVX512VL)
  1133  				// ecx
  1134  				fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
  1135  				fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
  1136  				fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
  1137  				fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
  1138  				fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
  1139  				// edx
  1140  				fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
  1141  				fs.setIf(edx&(1<<22) != 0, AMXBF16)
  1142  				fs.setIf(edx&(1<<23) != 0, AVX512FP16)
  1143  				fs.setIf(edx&(1<<24) != 0, AMXTILE)
  1144  				fs.setIf(edx&(1<<25) != 0, AMXINT8)
  1145  				// eax1 = CPUID.(EAX=7, ECX=1).EAX
  1146  				fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
  1147  			}
  1148  		}
  1149  	}
  1150  	// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
  1151  	// EAX
  1152  	// Bit 00: XSAVEOPT is available.
  1153  	// Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
  1154  	// Bit 02: Supports XGETBV with ECX = 1 if set.
  1155  	// Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
  1156  	// Bits 31 - 04: Reserved.
  1157  	// EBX
  1158  	// Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
  1159  	// ECX
  1160  	// Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
  1161  	// EDX?
  1162  	// Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
  1163  	if mfi >= 0xd {
  1164  		if fs.inSet(XSAVE) {
  1165  			eax, _, _, _ := cpuidex(0xd, 1)
  1166  			fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
  1167  			fs.setIf(eax&(1<<1) != 0, XSAVEC)
  1168  			fs.setIf(eax&(1<<2) != 0, XGETBV1)
  1169  			fs.setIf(eax&(1<<3) != 0, XSAVES)
  1170  		}
  1171  	}
  1172  	if maxExtendedFunction() >= 0x80000001 {
  1173  		_, _, c, d := cpuid(0x80000001)
  1174  		if (c & (1 << 5)) != 0 {
  1175  			fs.set(LZCNT)
  1176  			fs.set(POPCNT)
  1177  		}
  1178  		// ECX
  1179  		fs.setIf((c&(1<<0)) != 0, LAHF)
  1180  		fs.setIf((c&(1<<2)) != 0, SVM)
  1181  		fs.setIf((c&(1<<6)) != 0, SSE4A)
  1182  		fs.setIf((c&(1<<10)) != 0, IBS)
  1183  
  1184  		// EDX
  1185  		fs.setIf(d&(1<<11) != 0, SYSCALL)
  1186  		fs.setIf(d&(1<<20) != 0, NX)
  1187  		fs.setIf(d&(1<<22) != 0, MMXEXT)
  1188  		fs.setIf(d&(1<<23) != 0, MMX)
  1189  		fs.setIf(d&(1<<24) != 0, FXSR)
  1190  		fs.setIf(d&(1<<25) != 0, FXSROPT)
  1191  		fs.setIf(d&(1<<27) != 0, RDTSCP)
  1192  		fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
  1193  		fs.setIf(d&(1<<31) != 0, AMD3DNOW)
  1194  
  1195  		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
  1196  		 * used unless the OS has AVX support. */
  1197  		if fs.inSet(AVX) {
  1198  			fs.setIf((c&0x00000800) != 0, XOP)
  1199  			fs.setIf((c&0x00010000) != 0, FMA4)
  1200  		}
  1201  
  1202  	}
  1203  	if maxExtendedFunction() >= 0x80000007 {
  1204  		_, b, _, d := cpuid(0x80000007)
  1205  		fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
  1206  		fs.setIf((b&(1<<1)) != 0, SUCCOR)
  1207  		fs.setIf((b&(1<<2)) != 0, HWA)
  1208  		fs.setIf((d&(1<<9)) != 0, CPBOOST)
  1209  	}
  1210  
  1211  	if maxExtendedFunction() >= 0x80000008 {
  1212  		_, b, _, _ := cpuid(0x80000008)
  1213  		fs.setIf((b&(1<<9)) != 0, WBNOINVD)
  1214  		fs.setIf((b&(1<<8)) != 0, MCOMMIT)
  1215  		fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
  1216  		fs.setIf((b&(1<<4)) != 0, RDPRU)
  1217  		fs.setIf((b&(1<<3)) != 0, INVLPGB)
  1218  		fs.setIf((b&(1<<1)) != 0, MSRIRC)
  1219  		fs.setIf((b&(1<<0)) != 0, CLZERO)
  1220  	}
  1221  
  1222  	if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
  1223  		_, _, _, edx := cpuid(0x8000000A)
  1224  		fs.setIf((edx>>0)&1 == 1, SVMNP)
  1225  		fs.setIf((edx>>1)&1 == 1, LBRVIRT)
  1226  		fs.setIf((edx>>2)&1 == 1, SVML)
  1227  		fs.setIf((edx>>3)&1 == 1, NRIPS)
  1228  		fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
  1229  		fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
  1230  		fs.setIf((edx>>6)&1 == 1, SVMFBASID)
  1231  		fs.setIf((edx>>7)&1 == 1, SVMDA)
  1232  		fs.setIf((edx>>10)&1 == 1, SVMPF)
  1233  		fs.setIf((edx>>12)&1 == 1, SVMPFT)
  1234  	}
  1235  
  1236  	if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
  1237  		eax, _, _, _ := cpuid(0x8000001b)
  1238  		fs.setIf((eax>>0)&1 == 1, IBSFFV)
  1239  		fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
  1240  		fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
  1241  		fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
  1242  		fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
  1243  		fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
  1244  		fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
  1245  		fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
  1246  	}
  1247  
  1248  	if maxExtendedFunction() >= 0x8000001f && vend == AMD {
  1249  		a, _, _, _ := cpuid(0x8000001f)
  1250  		fs.setIf((a>>0)&1 == 1, SME)
  1251  		fs.setIf((a>>1)&1 == 1, SEV)
  1252  		fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
  1253  		fs.setIf((a>>3)&1 == 1, SEV_ES)
  1254  		fs.setIf((a>>4)&1 == 1, SEV_SNP)
  1255  		fs.setIf((a>>5)&1 == 1, VMPL)
  1256  		fs.setIf((a>>10)&1 == 1, SME_COHERENT)
  1257  		fs.setIf((a>>11)&1 == 1, SEV_64BIT)
  1258  		fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
  1259  		fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
  1260  		fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
  1261  		fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
  1262  		fs.setIf((a>>16)&1 == 1, VTE)
  1263  		fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
  1264  	}
  1265  
  1266  	return fs
  1267  }
  1268  
  1269  func valAsString(values ...uint32) []byte {
  1270  	r := make([]byte, 4*len(values))
  1271  	for i, v := range values {
  1272  		dst := r[i*4:]
  1273  		dst[0] = byte(v & 0xff)
  1274  		dst[1] = byte((v >> 8) & 0xff)
  1275  		dst[2] = byte((v >> 16) & 0xff)
  1276  		dst[3] = byte((v >> 24) & 0xff)
  1277  		switch {
  1278  		case dst[0] == 0:
  1279  			return r[:i*4]
  1280  		case dst[1] == 0:
  1281  			return r[:i*4+1]
  1282  		case dst[2] == 0:
  1283  			return r[:i*4+2]
  1284  		case dst[3] == 0:
  1285  			return r[:i*4+3]
  1286  		}
  1287  	}
  1288  	return r
  1289  }