github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/klauspost/cpuid/cpuid.go (about)

     1  // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
     2  
     3  // Package cpuid provides information about the CPU running the current program.
     4  //
     5  // CPU features are detected on startup, and kept for fast access through the life of the application.
     6  // Currently x86 / x64 (AMD64) is supported.
     7  //
     8  // You can access the CPU information by accessing the shared CPU variable of the cpuid library.
     9  //
    10  // Package home: https://yougam/libraries/klauspost/cpuid
    11  package cpuid
    12  
    13  import "strings"
    14  
    15  // Vendor is a representation of a CPU vendor.
    16  type Vendor int
    17  
    18  const (
    19  	Other Vendor = iota
    20  	Intel
    21  	AMD
    22  	VIA
    23  	Transmeta
    24  	NSC
    25  	KVM  // Kernel-based Virtual Machine
    26  	MSVM // Microsoft Hyper-V or Windows Virtual PC
    27  	VMware
    28  	XenHVM
    29  )
    30  
    31  const (
    32  	CMOV        = 1 << iota // i686 CMOV
    33  	NX                      // NX (No-Execute) bit
    34  	AMD3DNOW                // AMD 3DNOW
    35  	AMD3DNOWEXT             // AMD 3DNowExt
    36  	MMX                     // standard MMX
    37  	MMXEXT                  // SSE integer functions or AMD MMX ext
    38  	SSE                     // SSE functions
    39  	SSE2                    // P4 SSE functions
    40  	SSE3                    // Prescott SSE3 functions
    41  	SSSE3                   // Conroe SSSE3 functions
    42  	SSE4                    // Penryn SSE4.1 functions
    43  	SSE4A                   // AMD Barcelona microarchitecture SSE4a instructions
    44  	SSE42                   // Nehalem SSE4.2 functions
    45  	AVX                     // AVX functions
    46  	AVX2                    // AVX2 functions
    47  	FMA3                    // Intel FMA 3
    48  	FMA4                    // Bulldozer FMA4 functions
    49  	XOP                     // Bulldozer XOP functions
    50  	F16C                    // Half-precision floating-point conversion
    51  	BMI1                    // Bit Manipulation Instruction Set 1
    52  	BMI2                    // Bit Manipulation Instruction Set 2
    53  	TBM                     // AMD Trailing Bit Manipulation
    54  	LZCNT                   // LZCNT instruction
    55  	POPCNT                  // POPCNT instruction
    56  	AESNI                   // Advanced Encryption Standard New Instructions
    57  	CLMUL                   // Carry-less Multiplication
    58  	HTT                     // Hyperthreading (enabled)
    59  	HLE                     // Hardware Lock Elision
    60  	RTM                     // Restricted Transactional Memory
    61  	RDRAND                  // RDRAND instruction is available
    62  	RDSEED                  // RDSEED instruction is available
    63  	ADX                     // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
    64  	SHA                     // Intel SHA Extensions
    65  	AVX512F                 // AVX-512 Foundation
    66  	AVX512DQ                // AVX-512 Doubleword and Quadword Instructions
    67  	AVX512IFMA              // AVX-512 Integer Fused Multiply-Add Instructions
    68  	AVX512PF                // AVX-512 Prefetch Instructions
    69  	AVX512ER                // AVX-512 Exponential and Reciprocal Instructions
    70  	AVX512CD                // AVX-512 Conflict Detection Instructions
    71  	AVX512BW                // AVX-512 Byte and Word Instructions
    72  	AVX512VL                // AVX-512 Vector Length Extensions
    73  	AVX512VBMI              // AVX-512 Vector Bit Manipulation Instructions
    74  	MPX                     // Intel MPX (Memory Protection Extensions)
    75  	ERMS                    // Enhanced REP MOVSB/STOSB
    76  	RDTSCP                  // RDTSCP Instruction
    77  	CX16                    // CMPXCHG16B Instruction
    78  	SGX                     // Software Guard Extensions
    79  
    80  	// Performance indicators
    81  	SSE2SLOW // SSE2 is supported, but usually not faster
    82  	SSE3SLOW // SSE3 is supported, but usually not faster
    83  	ATOM     // Atom processor, some SSSE3 instructions are slower
    84  )
    85  
    86  var flagNames = map[Flags]string{
    87  	CMOV:        "CMOV",        // i686 CMOV
    88  	NX:          "NX",          // NX (No-Execute) bit
    89  	AMD3DNOW:    "AMD3DNOW",    // AMD 3DNOW
    90  	AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
    91  	MMX:         "MMX",         // Standard MMX
    92  	MMXEXT:      "MMXEXT",      // SSE integer functions or AMD MMX ext
    93  	SSE:         "SSE",         // SSE functions
    94  	SSE2:        "SSE2",        // P4 SSE2 functions
    95  	SSE3:        "SSE3",        // Prescott SSE3 functions
    96  	SSSE3:       "SSSE3",       // Conroe SSSE3 functions
    97  	SSE4:        "SSE4.1",      // Penryn SSE4.1 functions
    98  	SSE4A:       "SSE4A",       // AMD Barcelona microarchitecture SSE4a instructions
    99  	SSE42:       "SSE4.2",      // Nehalem SSE4.2 functions
   100  	AVX:         "AVX",         // AVX functions
   101  	AVX2:        "AVX2",        // AVX functions
   102  	FMA3:        "FMA3",        // Intel FMA 3
   103  	FMA4:        "FMA4",        // Bulldozer FMA4 functions
   104  	XOP:         "XOP",         // Bulldozer XOP functions
   105  	F16C:        "F16C",        // Half-precision floating-point conversion
   106  	BMI1:        "BMI1",        // Bit Manipulation Instruction Set 1
   107  	BMI2:        "BMI2",        // Bit Manipulation Instruction Set 2
   108  	TBM:         "TBM",         // AMD Trailing Bit Manipulation
   109  	LZCNT:       "LZCNT",       // LZCNT instruction
   110  	POPCNT:      "POPCNT",      // POPCNT instruction
   111  	AESNI:       "AESNI",       // Advanced Encryption Standard New Instructions
   112  	CLMUL:       "CLMUL",       // Carry-less Multiplication
   113  	HTT:         "HTT",         // Hyperthreading (enabled)
   114  	HLE:         "HLE",         // Hardware Lock Elision
   115  	RTM:         "RTM",         // Restricted Transactional Memory
   116  	RDRAND:      "RDRAND",      // RDRAND instruction is available
   117  	RDSEED:      "RDSEED",      // RDSEED instruction is available
   118  	ADX:         "ADX",         // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
   119  	SHA:         "SHA",         // Intel SHA Extensions
   120  	AVX512F:     "AVX512F",     // AVX-512 Foundation
   121  	AVX512DQ:    "AVX512DQ",    // AVX-512 Doubleword and Quadword Instructions
   122  	AVX512IFMA:  "AVX512IFMA",  // AVX-512 Integer Fused Multiply-Add Instructions
   123  	AVX512PF:    "AVX512PF",    // AVX-512 Prefetch Instructions
   124  	AVX512ER:    "AVX512ER",    // AVX-512 Exponential and Reciprocal Instructions
   125  	AVX512CD:    "AVX512CD",    // AVX-512 Conflict Detection Instructions
   126  	AVX512BW:    "AVX512BW",    // AVX-512 Byte and Word Instructions
   127  	AVX512VL:    "AVX512VL",    // AVX-512 Vector Length Extensions
   128  	AVX512VBMI:  "AVX512VBMI",  // AVX-512 Vector Bit Manipulation Instructions
   129  	MPX:         "MPX",         // Intel MPX (Memory Protection Extensions)
   130  	ERMS:        "ERMS",        // Enhanced REP MOVSB/STOSB
   131  	RDTSCP:      "RDTSCP",      // RDTSCP Instruction
   132  	CX16:        "CX16",        // CMPXCHG16B Instruction
   133  	SGX:         "SGX",         // Software Guard Extensions
   134  
   135  	// Performance indicators
   136  	SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
   137  	SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
   138  	ATOM:     "ATOM",     // Atom processor, some SSSE3 instructions are slower
   139  
   140  }
   141  
   142  // CPUInfo contains information about the detected system CPU.
   143  type CPUInfo struct {
   144  	BrandName      string // Brand name reported by the CPU
   145  	VendorID       Vendor // Comparable CPU vendor ID
   146  	Features       Flags  // Features of the CPU
   147  	PhysicalCores  int    // Number of physical processor cores in your CPU. Will be 0 if undetectable.
   148  	ThreadsPerCore int    // Number of threads per physical core. Will be 1 if undetectable.
   149  	LogicalCores   int    // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
   150  	Family         int    // CPU family number
   151  	Model          int    // CPU model number
   152  	CacheLine      int    // Cache line size in bytes. Will be 0 if undetectable.
   153  	Cache          struct {
   154  		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
   155  		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
   156  		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
   157  		L3  int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
   158  	}
   159  	SGX       SGXSupport
   160  	maxFunc   uint32
   161  	maxExFunc uint32
   162  }
   163  
   164  var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
   165  var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
   166  var xgetbv func(index uint32) (eax, edx uint32)
   167  var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
   168  
   169  // CPU contains information about the CPU as detected on startup,
   170  // or when Detect last was called.
   171  //
   172  // Use this as the primary entry point to you data,
   173  // this way queries are
   174  var CPU CPUInfo
   175  
   176  func init() {
   177  	initCPU()
   178  	Detect()
   179  }
   180  
   181  // Detect will re-detect current CPU info.
   182  // This will replace the content of the exported CPU variable.
   183  //
   184  // Unless you expect the CPU to change while you are running your program
   185  // you should not need to call this function.
   186  // If you call this, you must ensure that no other goroutine is accessing the
   187  // exported CPU variable.
   188  func Detect() {
   189  	CPU.maxFunc = maxFunctionID()
   190  	CPU.maxExFunc = maxExtendedFunction()
   191  	CPU.BrandName = brandName()
   192  	CPU.CacheLine = cacheLine()
   193  	CPU.Family, CPU.Model = familyModel()
   194  	CPU.Features = support()
   195  	CPU.SGX = sgx(CPU.Features&SGX != 0)
   196  	CPU.ThreadsPerCore = threadsPerCore()
   197  	CPU.LogicalCores = logicalCores()
   198  	CPU.PhysicalCores = physicalCores()
   199  	CPU.VendorID = vendorID()
   200  	CPU.cacheSize()
   201  }
   202  
   203  // Generated here: http://play.yougam/libraries/p/BxFH2Gdc0G
   204  
   205  // Cmov indicates support of CMOV instructions
   206  func (c CPUInfo) Cmov() bool {
   207  	return c.Features&CMOV != 0
   208  }
   209  
   210  // Amd3dnow indicates support of AMD 3DNOW! instructions
   211  func (c CPUInfo) Amd3dnow() bool {
   212  	return c.Features&AMD3DNOW != 0
   213  }
   214  
   215  // Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
   216  func (c CPUInfo) Amd3dnowExt() bool {
   217  	return c.Features&AMD3DNOWEXT != 0
   218  }
   219  
   220  // MMX indicates support of MMX instructions
   221  func (c CPUInfo) MMX() bool {
   222  	return c.Features&MMX != 0
   223  }
   224  
   225  // MMXExt indicates support of MMXEXT instructions
   226  // (SSE integer functions or AMD MMX ext)
   227  func (c CPUInfo) MMXExt() bool {
   228  	return c.Features&MMXEXT != 0
   229  }
   230  
   231  // SSE indicates support of SSE instructions
   232  func (c CPUInfo) SSE() bool {
   233  	return c.Features&SSE != 0
   234  }
   235  
   236  // SSE2 indicates support of SSE 2 instructions
   237  func (c CPUInfo) SSE2() bool {
   238  	return c.Features&SSE2 != 0
   239  }
   240  
   241  // SSE3 indicates support of SSE 3 instructions
   242  func (c CPUInfo) SSE3() bool {
   243  	return c.Features&SSE3 != 0
   244  }
   245  
   246  // SSSE3 indicates support of SSSE 3 instructions
   247  func (c CPUInfo) SSSE3() bool {
   248  	return c.Features&SSSE3 != 0
   249  }
   250  
   251  // SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
   252  func (c CPUInfo) SSE4() bool {
   253  	return c.Features&SSE4 != 0
   254  }
   255  
   256  // SSE42 indicates support of SSE4.2 instructions
   257  func (c CPUInfo) SSE42() bool {
   258  	return c.Features&SSE42 != 0
   259  }
   260  
   261  // AVX indicates support of AVX instructions
   262  // and operating system support of AVX instructions
   263  func (c CPUInfo) AVX() bool {
   264  	return c.Features&AVX != 0
   265  }
   266  
   267  // AVX2 indicates support of AVX2 instructions
   268  func (c CPUInfo) AVX2() bool {
   269  	return c.Features&AVX2 != 0
   270  }
   271  
   272  // FMA3 indicates support of FMA3 instructions
   273  func (c CPUInfo) FMA3() bool {
   274  	return c.Features&FMA3 != 0
   275  }
   276  
   277  // FMA4 indicates support of FMA4 instructions
   278  func (c CPUInfo) FMA4() bool {
   279  	return c.Features&FMA4 != 0
   280  }
   281  
   282  // XOP indicates support of XOP instructions
   283  func (c CPUInfo) XOP() bool {
   284  	return c.Features&XOP != 0
   285  }
   286  
   287  // F16C indicates support of F16C instructions
   288  func (c CPUInfo) F16C() bool {
   289  	return c.Features&F16C != 0
   290  }
   291  
   292  // BMI1 indicates support of BMI1 instructions
   293  func (c CPUInfo) BMI1() bool {
   294  	return c.Features&BMI1 != 0
   295  }
   296  
   297  // BMI2 indicates support of BMI2 instructions
   298  func (c CPUInfo) BMI2() bool {
   299  	return c.Features&BMI2 != 0
   300  }
   301  
   302  // TBM indicates support of TBM instructions
   303  // (AMD Trailing Bit Manipulation)
   304  func (c CPUInfo) TBM() bool {
   305  	return c.Features&TBM != 0
   306  }
   307  
   308  // Lzcnt indicates support of LZCNT instruction
   309  func (c CPUInfo) Lzcnt() bool {
   310  	return c.Features&LZCNT != 0
   311  }
   312  
   313  // Popcnt indicates support of POPCNT instruction
   314  func (c CPUInfo) Popcnt() bool {
   315  	return c.Features&POPCNT != 0
   316  }
   317  
   318  // HTT indicates the processor has Hyperthreading enabled
   319  func (c CPUInfo) HTT() bool {
   320  	return c.Features&HTT != 0
   321  }
   322  
   323  // SSE2Slow indicates that SSE2 may be slow on this processor
   324  func (c CPUInfo) SSE2Slow() bool {
   325  	return c.Features&SSE2SLOW != 0
   326  }
   327  
   328  // SSE3Slow indicates that SSE3 may be slow on this processor
   329  func (c CPUInfo) SSE3Slow() bool {
   330  	return c.Features&SSE3SLOW != 0
   331  }
   332  
   333  // AesNi indicates support of AES-NI instructions
   334  // (Advanced Encryption Standard New Instructions)
   335  func (c CPUInfo) AesNi() bool {
   336  	return c.Features&AESNI != 0
   337  }
   338  
   339  // Clmul indicates support of CLMUL instructions
   340  // (Carry-less Multiplication)
   341  func (c CPUInfo) Clmul() bool {
   342  	return c.Features&CLMUL != 0
   343  }
   344  
   345  // NX indicates support of NX (No-Execute) bit
   346  func (c CPUInfo) NX() bool {
   347  	return c.Features&NX != 0
   348  }
   349  
   350  // SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
   351  func (c CPUInfo) SSE4A() bool {
   352  	return c.Features&SSE4A != 0
   353  }
   354  
   355  // HLE indicates support of Hardware Lock Elision
   356  func (c CPUInfo) HLE() bool {
   357  	return c.Features&HLE != 0
   358  }
   359  
   360  // RTM indicates support of Restricted Transactional Memory
   361  func (c CPUInfo) RTM() bool {
   362  	return c.Features&RTM != 0
   363  }
   364  
   365  // Rdrand indicates support of RDRAND instruction is available
   366  func (c CPUInfo) Rdrand() bool {
   367  	return c.Features&RDRAND != 0
   368  }
   369  
   370  // Rdseed indicates support of RDSEED instruction is available
   371  func (c CPUInfo) Rdseed() bool {
   372  	return c.Features&RDSEED != 0
   373  }
   374  
   375  // ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
   376  func (c CPUInfo) ADX() bool {
   377  	return c.Features&ADX != 0
   378  }
   379  
   380  // SHA indicates support of Intel SHA Extensions
   381  func (c CPUInfo) SHA() bool {
   382  	return c.Features&SHA != 0
   383  }
   384  
   385  // AVX512F indicates support of AVX-512 Foundation
   386  func (c CPUInfo) AVX512F() bool {
   387  	return c.Features&AVX512F != 0
   388  }
   389  
   390  // AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
   391  func (c CPUInfo) AVX512DQ() bool {
   392  	return c.Features&AVX512DQ != 0
   393  }
   394  
   395  // AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
   396  func (c CPUInfo) AVX512IFMA() bool {
   397  	return c.Features&AVX512IFMA != 0
   398  }
   399  
   400  // AVX512PF indicates support of AVX-512 Prefetch Instructions
   401  func (c CPUInfo) AVX512PF() bool {
   402  	return c.Features&AVX512PF != 0
   403  }
   404  
   405  // AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
   406  func (c CPUInfo) AVX512ER() bool {
   407  	return c.Features&AVX512ER != 0
   408  }
   409  
   410  // AVX512CD indicates support of AVX-512 Conflict Detection Instructions
   411  func (c CPUInfo) AVX512CD() bool {
   412  	return c.Features&AVX512CD != 0
   413  }
   414  
   415  // AVX512BW indicates support of AVX-512 Byte and Word Instructions
   416  func (c CPUInfo) AVX512BW() bool {
   417  	return c.Features&AVX512BW != 0
   418  }
   419  
   420  // AVX512VL indicates support of AVX-512 Vector Length Extensions
   421  func (c CPUInfo) AVX512VL() bool {
   422  	return c.Features&AVX512VL != 0
   423  }
   424  
   425  // AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
   426  func (c CPUInfo) AVX512VBMI() bool {
   427  	return c.Features&AVX512VBMI != 0
   428  }
   429  
   430  // MPX indicates support of Intel MPX (Memory Protection Extensions)
   431  func (c CPUInfo) MPX() bool {
   432  	return c.Features&MPX != 0
   433  }
   434  
   435  // ERMS indicates support of Enhanced REP MOVSB/STOSB
   436  func (c CPUInfo) ERMS() bool {
   437  	return c.Features&ERMS != 0
   438  }
   439  
   440  func (c CPUInfo) RDTSCP() bool {
   441  	return c.Features&RDTSCP != 0
   442  }
   443  
   444  func (c CPUInfo) CX16() bool {
   445  	return c.Features&CX16 != 0
   446  }
   447  
   448  // Atom indicates an Atom processor
   449  func (c CPUInfo) Atom() bool {
   450  	return c.Features&ATOM != 0
   451  }
   452  
   453  // Intel returns true if vendor is recognized as Intel
   454  func (c CPUInfo) Intel() bool {
   455  	return c.VendorID == Intel
   456  }
   457  
   458  // AMD returns true if vendor is recognized as AMD
   459  func (c CPUInfo) AMD() bool {
   460  	return c.VendorID == AMD
   461  }
   462  
   463  // Transmeta returns true if vendor is recognized as Transmeta
   464  func (c CPUInfo) Transmeta() bool {
   465  	return c.VendorID == Transmeta
   466  }
   467  
   468  // NSC returns true if vendor is recognized as National Semiconductor
   469  func (c CPUInfo) NSC() bool {
   470  	return c.VendorID == NSC
   471  }
   472  
   473  // VIA returns true if vendor is recognized as VIA
   474  func (c CPUInfo) VIA() bool {
   475  	return c.VendorID == VIA
   476  }
   477  
   478  // RTCounter returns the 64-bit time-stamp counter
   479  // Uses the RDTSCP instruction. The value 0 is returned
   480  // if the CPU does not support the instruction.
   481  func (c CPUInfo) RTCounter() uint64 {
   482  	if !c.RDTSCP() {
   483  		return 0
   484  	}
   485  	a, _, _, d := rdtscpAsm()
   486  	return uint64(a) | (uint64(d) << 32)
   487  }
   488  
   489  // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
   490  // This variable is OS dependent, but on Linux contains information
   491  // about the current cpu/core the code is running on.
   492  // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
   493  func (c CPUInfo) Ia32TscAux() uint32 {
   494  	if !c.RDTSCP() {
   495  		return 0
   496  	}
   497  	_, _, ecx, _ := rdtscpAsm()
   498  	return ecx
   499  }
   500  
   501  // LogicalCPU will return the Logical CPU the code is currently executing on.
   502  // This is likely to change when the OS re-schedules the running thread
   503  // to another CPU.
   504  // If the current core cannot be detected, -1 will be returned.
   505  func (c CPUInfo) LogicalCPU() int {
   506  	if c.maxFunc < 1 {
   507  		return -1
   508  	}
   509  	_, ebx, _, _ := cpuid(1)
   510  	return int(ebx >> 24)
   511  }
   512  
   513  // VM Will return true if the cpu id indicates we are in
   514  // a virtual machine. This is only a hint, and will very likely
   515  // have many false negatives.
   516  func (c CPUInfo) VM() bool {
   517  	switch c.VendorID {
   518  	case MSVM, KVM, VMware, XenHVM:
   519  		return true
   520  	}
   521  	return false
   522  }
   523  
   524  // Flags contains detected cpu features and caracteristics
   525  type Flags uint64
   526  
   527  // String returns a string representation of the detected
   528  // CPU features.
   529  func (f Flags) String() string {
   530  	return strings.Join(f.Strings(), ",")
   531  }
   532  
   533  // Strings returns and array of the detected features.
   534  func (f Flags) Strings() []string {
   535  	s := support()
   536  	r := make([]string, 0, 20)
   537  	for i := uint(0); i < 64; i++ {
   538  		key := Flags(1 << i)
   539  		val := flagNames[key]
   540  		if s&key != 0 {
   541  			r = append(r, val)
   542  		}
   543  	}
   544  	return r
   545  }
   546  
   547  func maxExtendedFunction() uint32 {
   548  	eax, _, _, _ := cpuid(0x80000000)
   549  	return eax
   550  }
   551  
   552  func maxFunctionID() uint32 {
   553  	a, _, _, _ := cpuid(0)
   554  	return a
   555  }
   556  
   557  func brandName() string {
   558  	if maxExtendedFunction() >= 0x80000004 {
   559  		v := make([]uint32, 0, 48)
   560  		for i := uint32(0); i < 3; i++ {
   561  			a, b, c, d := cpuid(0x80000002 + i)
   562  			v = append(v, a, b, c, d)
   563  		}
   564  		return strings.Trim(string(valAsString(v...)), " ")
   565  	}
   566  	return "unknown"
   567  }
   568  
   569  func threadsPerCore() int {
   570  	mfi := maxFunctionID()
   571  	if mfi < 0x4 || vendorID() != Intel {
   572  		return 1
   573  	}
   574  
   575  	if mfi < 0xb {
   576  		_, b, _, d := cpuid(1)
   577  		if (d & (1 << 28)) != 0 {
   578  			// v will contain logical core count
   579  			v := (b >> 16) & 255
   580  			if v > 1 {
   581  				a4, _, _, _ := cpuid(4)
   582  				// physical cores
   583  				v2 := (a4 >> 26) + 1
   584  				if v2 > 0 {
   585  					return int(v) / int(v2)
   586  				}
   587  			}
   588  		}
   589  		return 1
   590  	}
   591  	_, b, _, _ := cpuidex(0xb, 0)
   592  	if b&0xffff == 0 {
   593  		return 1
   594  	}
   595  	return int(b & 0xffff)
   596  }
   597  
   598  func logicalCores() int {
   599  	mfi := maxFunctionID()
   600  	switch vendorID() {
   601  	case Intel:
   602  		// Use this on old Intel processors
   603  		if mfi < 0xb {
   604  			if mfi < 1 {
   605  				return 0
   606  			}
   607  			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
   608  			// that can be assigned to logical processors in a physical package.
   609  			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
   610  			_, ebx, _, _ := cpuid(1)
   611  			logical := (ebx >> 16) & 0xff
   612  			return int(logical)
   613  		}
   614  		_, b, _, _ := cpuidex(0xb, 1)
   615  		return int(b & 0xffff)
   616  	case AMD:
   617  		_, b, _, _ := cpuid(1)
   618  		return int((b >> 16) & 0xff)
   619  	default:
   620  		return 0
   621  	}
   622  }
   623  
   624  func familyModel() (int, int) {
   625  	if maxFunctionID() < 0x1 {
   626  		return 0, 0
   627  	}
   628  	eax, _, _, _ := cpuid(1)
   629  	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
   630  	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
   631  	return int(family), int(model)
   632  }
   633  
   634  func physicalCores() int {
   635  	switch vendorID() {
   636  	case Intel:
   637  		return logicalCores() / threadsPerCore()
   638  	case AMD:
   639  		if maxExtendedFunction() >= 0x80000008 {
   640  			_, _, c, _ := cpuid(0x80000008)
   641  			return int(c&0xff) + 1
   642  		}
   643  	}
   644  	return 0
   645  }
   646  
   647  // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
   648  var vendorMapping = map[string]Vendor{
   649  	"AMDisbetter!": AMD,
   650  	"AuthenticAMD": AMD,
   651  	"CentaurHauls": VIA,
   652  	"GenuineIntel": Intel,
   653  	"TransmetaCPU": Transmeta,
   654  	"GenuineTMx86": Transmeta,
   655  	"Geode by NSC": NSC,
   656  	"VIA VIA VIA ": VIA,
   657  	"KVMKVMKVMKVM": KVM,
   658  	"Microsoft Hv": MSVM,
   659  	"VMwareVMware": VMware,
   660  	"XenVMMXenVMM": XenHVM,
   661  }
   662  
   663  func vendorID() Vendor {
   664  	_, b, c, d := cpuid(0)
   665  	v := valAsString(b, d, c)
   666  	vend, ok := vendorMapping[string(v)]
   667  	if !ok {
   668  		return Other
   669  	}
   670  	return vend
   671  }
   672  
   673  func cacheLine() int {
   674  	if maxFunctionID() < 0x1 {
   675  		return 0
   676  	}
   677  
   678  	_, ebx, _, _ := cpuid(1)
   679  	cache := (ebx & 0xff00) >> 5 // cflush size
   680  	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
   681  		_, _, ecx, _ := cpuid(0x80000006)
   682  		cache = ecx & 0xff // cacheline size
   683  	}
   684  	// TODO: Read from Cache and TLB Information
   685  	return int(cache)
   686  }
   687  
   688  func (c *CPUInfo) cacheSize() {
   689  	c.Cache.L1D = -1
   690  	c.Cache.L1I = -1
   691  	c.Cache.L2 = -1
   692  	c.Cache.L3 = -1
   693  	vendor := vendorID()
   694  	switch vendor {
   695  	case Intel:
   696  		if maxFunctionID() < 4 {
   697  			return
   698  		}
   699  		for i := uint32(0); ; i++ {
   700  			eax, ebx, ecx, _ := cpuidex(4, i)
   701  			cacheType := eax & 15
   702  			if cacheType == 0 {
   703  				break
   704  			}
   705  			cacheLevel := (eax >> 5) & 7
   706  			coherency := int(ebx&0xfff) + 1
   707  			partitions := int((ebx>>12)&0x3ff) + 1
   708  			associativity := int((ebx>>22)&0x3ff) + 1
   709  			sets := int(ecx) + 1
   710  			size := associativity * partitions * coherency * sets
   711  			switch cacheLevel {
   712  			case 1:
   713  				if cacheType == 1 {
   714  					// 1 = Data Cache
   715  					c.Cache.L1D = size
   716  				} else if cacheType == 2 {
   717  					// 2 = Instruction Cache
   718  					c.Cache.L1I = size
   719  				} else {
   720  					if c.Cache.L1D < 0 {
   721  						c.Cache.L1I = size
   722  					}
   723  					if c.Cache.L1I < 0 {
   724  						c.Cache.L1I = size
   725  					}
   726  				}
   727  			case 2:
   728  				c.Cache.L2 = size
   729  			case 3:
   730  				c.Cache.L3 = size
   731  			}
   732  		}
   733  	case AMD:
   734  		// Untested.
   735  		if maxExtendedFunction() < 0x80000005 {
   736  			return
   737  		}
   738  		_, _, ecx, edx := cpuid(0x80000005)
   739  		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
   740  		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
   741  
   742  		if maxExtendedFunction() < 0x80000006 {
   743  			return
   744  		}
   745  		_, _, ecx, _ = cpuid(0x80000006)
   746  		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
   747  	}
   748  
   749  	return
   750  }
   751  
   752  type SGXSupport struct {
   753  	Available           bool
   754  	SGX1Supported       bool
   755  	SGX2Supported       bool
   756  	MaxEnclaveSizeNot64 int64
   757  	MaxEnclaveSize64    int64
   758  }
   759  
   760  func sgx(available bool) (rval SGXSupport) {
   761  	rval.Available = available
   762  
   763  	if !available {
   764  		return
   765  	}
   766  
   767  	a, _, _, d := cpuidex(0x12, 0)
   768  	rval.SGX1Supported = a&0x01 != 0
   769  	rval.SGX2Supported = a&0x02 != 0
   770  	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
   771  	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
   772  
   773  	return
   774  }
   775  
   776  func support() Flags {
   777  	mfi := maxFunctionID()
   778  	vend := vendorID()
   779  	if mfi < 0x1 {
   780  		return 0
   781  	}
   782  	rval := uint64(0)
   783  	_, _, c, d := cpuid(1)
   784  	if (d & (1 << 15)) != 0 {
   785  		rval |= CMOV
   786  	}
   787  	if (d & (1 << 23)) != 0 {
   788  		rval |= MMX
   789  	}
   790  	if (d & (1 << 25)) != 0 {
   791  		rval |= MMXEXT
   792  	}
   793  	if (d & (1 << 25)) != 0 {
   794  		rval |= SSE
   795  	}
   796  	if (d & (1 << 26)) != 0 {
   797  		rval |= SSE2
   798  	}
   799  	if (c & 1) != 0 {
   800  		rval |= SSE3
   801  	}
   802  	if (c & 0x00000200) != 0 {
   803  		rval |= SSSE3
   804  	}
   805  	if (c & 0x00080000) != 0 {
   806  		rval |= SSE4
   807  	}
   808  	if (c & 0x00100000) != 0 {
   809  		rval |= SSE42
   810  	}
   811  	if (c & (1 << 25)) != 0 {
   812  		rval |= AESNI
   813  	}
   814  	if (c & (1 << 1)) != 0 {
   815  		rval |= CLMUL
   816  	}
   817  	if c&(1<<23) != 0 {
   818  		rval |= POPCNT
   819  	}
   820  	if c&(1<<30) != 0 {
   821  		rval |= RDRAND
   822  	}
   823  	if c&(1<<29) != 0 {
   824  		rval |= F16C
   825  	}
   826  	if c&(1<<13) != 0 {
   827  		rval |= CX16
   828  	}
   829  	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
   830  		if threadsPerCore() > 1 {
   831  			rval |= HTT
   832  		}
   833  	}
   834  
   835  	// Check XGETBV, OXSAVE and AVX bits
   836  	if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
   837  		// Check for OS support
   838  		eax, _ := xgetbv(0)
   839  		if (eax & 0x6) == 0x6 {
   840  			rval |= AVX
   841  			if (c & 0x00001000) != 0 {
   842  				rval |= FMA3
   843  			}
   844  		}
   845  	}
   846  
   847  	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
   848  	if mfi >= 7 {
   849  		_, ebx, ecx, _ := cpuidex(7, 0)
   850  		if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
   851  			rval |= AVX2
   852  		}
   853  		if (ebx & 0x00000008) != 0 {
   854  			rval |= BMI1
   855  			if (ebx & 0x00000100) != 0 {
   856  				rval |= BMI2
   857  			}
   858  		}
   859  		if ebx&(1<<2) != 0 {
   860  			rval |= SGX
   861  		}
   862  		if ebx&(1<<4) != 0 {
   863  			rval |= HLE
   864  		}
   865  		if ebx&(1<<9) != 0 {
   866  			rval |= ERMS
   867  		}
   868  		if ebx&(1<<11) != 0 {
   869  			rval |= RTM
   870  		}
   871  		if ebx&(1<<14) != 0 {
   872  			rval |= MPX
   873  		}
   874  		if ebx&(1<<18) != 0 {
   875  			rval |= RDSEED
   876  		}
   877  		if ebx&(1<<19) != 0 {
   878  			rval |= ADX
   879  		}
   880  		if ebx&(1<<29) != 0 {
   881  			rval |= SHA
   882  		}
   883  
   884  		// Only detect AVX-512 features if XGETBV is supported
   885  		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
   886  			// Check for OS support
   887  			eax, _ := xgetbv(0)
   888  
   889  			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
   890  			// ZMM16-ZMM31 state are enabled by OS)
   891  			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
   892  			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
   893  				if ebx&(1<<16) != 0 {
   894  					rval |= AVX512F
   895  				}
   896  				if ebx&(1<<17) != 0 {
   897  					rval |= AVX512DQ
   898  				}
   899  				if ebx&(1<<21) != 0 {
   900  					rval |= AVX512IFMA
   901  				}
   902  				if ebx&(1<<26) != 0 {
   903  					rval |= AVX512PF
   904  				}
   905  				if ebx&(1<<27) != 0 {
   906  					rval |= AVX512ER
   907  				}
   908  				if ebx&(1<<28) != 0 {
   909  					rval |= AVX512CD
   910  				}
   911  				if ebx&(1<<30) != 0 {
   912  					rval |= AVX512BW
   913  				}
   914  				if ebx&(1<<31) != 0 {
   915  					rval |= AVX512VL
   916  				}
   917  				// ecx
   918  				if ecx&(1<<1) != 0 {
   919  					rval |= AVX512VBMI
   920  				}
   921  			}
   922  		}
   923  	}
   924  
   925  	if maxExtendedFunction() >= 0x80000001 {
   926  		_, _, c, d := cpuid(0x80000001)
   927  		if (c & (1 << 5)) != 0 {
   928  			rval |= LZCNT
   929  			rval |= POPCNT
   930  		}
   931  		if (d & (1 << 31)) != 0 {
   932  			rval |= AMD3DNOW
   933  		}
   934  		if (d & (1 << 30)) != 0 {
   935  			rval |= AMD3DNOWEXT
   936  		}
   937  		if (d & (1 << 23)) != 0 {
   938  			rval |= MMX
   939  		}
   940  		if (d & (1 << 22)) != 0 {
   941  			rval |= MMXEXT
   942  		}
   943  		if (c & (1 << 6)) != 0 {
   944  			rval |= SSE4A
   945  		}
   946  		if d&(1<<20) != 0 {
   947  			rval |= NX
   948  		}
   949  		if d&(1<<27) != 0 {
   950  			rval |= RDTSCP
   951  		}
   952  
   953  		/* Allow for selectively disabling SSE2 functions on AMD processors
   954  		   with SSE2 support but not SSE4a. This includes Athlon64, some
   955  		   Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
   956  		   than SSE2 often enough to utilize this special-case flag.
   957  		   AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
   958  		   so that SSE2 is used unless explicitly disabled by checking
   959  		   AV_CPU_FLAG_SSE2SLOW. */
   960  		if vendorID() != Intel &&
   961  			rval&SSE2 != 0 && (c&0x00000040) == 0 {
   962  			rval |= SSE2SLOW
   963  		}
   964  
   965  		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
   966  		 * used unless the OS has AVX support. */
   967  		if (rval & AVX) != 0 {
   968  			if (c & 0x00000800) != 0 {
   969  				rval |= XOP
   970  			}
   971  			if (c & 0x00010000) != 0 {
   972  				rval |= FMA4
   973  			}
   974  		}
   975  
   976  		if vendorID() == Intel {
   977  			family, model := familyModel()
   978  			if family == 6 && (model == 9 || model == 13 || model == 14) {
   979  				/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
   980  				 * 6/14 (core1 "yonah") theoretically support sse2, but it's
   981  				 * usually slower than mmx. */
   982  				if (rval & SSE2) != 0 {
   983  					rval |= SSE2SLOW
   984  				}
   985  				if (rval & SSE3) != 0 {
   986  					rval |= SSE3SLOW
   987  				}
   988  			}
   989  			/* The Atom processor has SSSE3 support, which is useful in many cases,
   990  			 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
   991  			 * on the Atom, but is generally faster on other processors supporting
   992  			 * SSSE3. This flag allows for selectively disabling certain SSSE3
   993  			 * functions on the Atom. */
   994  			if family == 6 && model == 28 {
   995  				rval |= ATOM
   996  			}
   997  		}
   998  	}
   999  	return Flags(rval)
  1000  }
  1001  
  1002  func valAsString(values ...uint32) []byte {
  1003  	r := make([]byte, 4*len(values))
  1004  	for i, v := range values {
  1005  		dst := r[i*4:]
  1006  		dst[0] = byte(v & 0xff)
  1007  		dst[1] = byte((v >> 8) & 0xff)
  1008  		dst[2] = byte((v >> 16) & 0xff)
  1009  		dst[3] = byte((v >> 24) & 0xff)
  1010  		switch {
  1011  		case dst[0] == 0:
  1012  			return r[:i*4]
  1013  		case dst[1] == 0:
  1014  			return r[:i*4+1]
  1015  		case dst[2] == 0:
  1016  			return r[:i*4+2]
  1017  		case dst[3] == 0:
  1018  			return r[:i*4+3]
  1019  		}
  1020  	}
  1021  	return r
  1022  }