github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/klauspost/cpuid/private/cpuid.go (about)

     1  // Generated, DO NOT EDIT,
     2  // but copy it to your own project and rename the package.
     3  // See more at http://yougam/libraries/klauspost/cpuid
     4  
     5  package cpuid
     6  
     7  import (
     8  	"strings"
     9  )
    10  
    11  // Vendor is a representation of a CPU vendor.
    12  type vendor int
    13  
    14  const (
    15  	other	vendor	= iota
    16  	intel
    17  	amd
    18  	via
    19  	transmeta
    20  	nsc
    21  	kvm	// Kernel-based Virtual Machine
    22  	msvm	// Microsoft Hyper-V or Windows Virtual PC
    23  	vmware
    24  	xenhvm
    25  )
    26  
    27  const (
    28  	cmov		= 1 << iota	// i686 CMOV
    29  	nx				// NX (No-Execute) bit
    30  	amd3dnow			// AMD 3DNOW
    31  	amd3dnowext			// AMD 3DNowExt
    32  	mmx				// standard MMX
    33  	mmxext				// SSE integer functions or AMD MMX ext
    34  	sse				// SSE functions
    35  	sse2				// P4 SSE functions
    36  	sse3				// Prescott SSE3 functions
    37  	ssse3				// Conroe SSSE3 functions
    38  	sse4				// Penryn SSE4.1 functions
    39  	sse4a				// AMD Barcelona microarchitecture SSE4a instructions
    40  	sse42				// Nehalem SSE4.2 functions
    41  	avx				// AVX functions
    42  	avx2				// AVX2 functions
    43  	fma3				// Intel FMA 3
    44  	fma4				// Bulldozer FMA4 functions
    45  	xop				// Bulldozer XOP functions
    46  	f16c				// Half-precision floating-point conversion
    47  	bmi1				// Bit Manipulation Instruction Set 1
    48  	bmi2				// Bit Manipulation Instruction Set 2
    49  	tbm				// AMD Trailing Bit Manipulation
    50  	lzcnt				// LZCNT instruction
    51  	popcnt				// POPCNT instruction
    52  	aesni				// Advanced Encryption Standard New Instructions
    53  	clmul				// Carry-less Multiplication
    54  	htt				// Hyperthreading (enabled)
    55  	hle				// Hardware Lock Elision
    56  	rtm				// Restricted Transactional Memory
    57  	rdrand				// RDRAND instruction is available
    58  	rdseed				// RDSEED instruction is available
    59  	adx				// Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
    60  	sha				// Intel SHA Extensions
    61  	avx512f				// AVX-512 Foundation
    62  	avx512dq			// AVX-512 Doubleword and Quadword Instructions
    63  	avx512ifma			// AVX-512 Integer Fused Multiply-Add Instructions
    64  	avx512pf			// AVX-512 Prefetch Instructions
    65  	avx512er			// AVX-512 Exponential and Reciprocal Instructions
    66  	avx512cd			// AVX-512 Conflict Detection Instructions
    67  	avx512bw			// AVX-512 Byte and Word Instructions
    68  	avx512vl			// AVX-512 Vector Length Extensions
    69  	avx512vbmi			// AVX-512 Vector Bit Manipulation Instructions
    70  	mpx				// Intel MPX (Memory Protection Extensions)
    71  	erms				// Enhanced REP MOVSB/STOSB
    72  	rdtscp				// RDTSCP Instruction
    73  	cx16				// CMPXCHG16B Instruction
    74  
    75  	// Performance indicators
    76  	sse2slow	// SSE2 is supported, but usually not faster
    77  	sse3slow	// SSE3 is supported, but usually not faster
    78  	atom		// Atom processor, some SSSE3 instructions are slower
    79  )
    80  
    81  var flagNames = map[flags]string{
    82  	cmov:		"CMOV",		// i686 CMOV
    83  	nx:		"NX",		// NX (No-Execute) bit
    84  	amd3dnow:	"AMD3DNOW",	// AMD 3DNOW
    85  	amd3dnowext:	"AMD3DNOWEXT",	// AMD 3DNowExt
    86  	mmx:		"MMX",		// Standard MMX
    87  	mmxext:		"MMXEXT",	// SSE integer functions or AMD MMX ext
    88  	sse:		"SSE",		// SSE functions
    89  	sse2:		"SSE2",		// P4 SSE2 functions
    90  	sse3:		"SSE3",		// Prescott SSE3 functions
    91  	ssse3:		"SSSE3",	// Conroe SSSE3 functions
    92  	sse4:		"SSE4.1",	// Penryn SSE4.1 functions
    93  	sse4a:		"SSE4A",	// AMD Barcelona microarchitecture SSE4a instructions
    94  	sse42:		"SSE4.2",	// Nehalem SSE4.2 functions
    95  	avx:		"AVX",		// AVX functions
    96  	avx2:		"AVX2",		// AVX functions
    97  	fma3:		"FMA3",		// Intel FMA 3
    98  	fma4:		"FMA4",		// Bulldozer FMA4 functions
    99  	xop:		"XOP",		// Bulldozer XOP functions
   100  	f16c:		"F16C",		// Half-precision floating-point conversion
   101  	bmi1:		"BMI1",		// Bit Manipulation Instruction Set 1
   102  	bmi2:		"BMI2",		// Bit Manipulation Instruction Set 2
   103  	tbm:		"TBM",		// AMD Trailing Bit Manipulation
   104  	lzcnt:		"LZCNT",	// LZCNT instruction
   105  	popcnt:		"POPCNT",	// POPCNT instruction
   106  	aesni:		"AESNI",	// Advanced Encryption Standard New Instructions
   107  	clmul:		"CLMUL",	// Carry-less Multiplication
   108  	htt:		"HTT",		// Hyperthreading (enabled)
   109  	hle:		"HLE",		// Hardware Lock Elision
   110  	rtm:		"RTM",		// Restricted Transactional Memory
   111  	rdrand:		"RDRAND",	// RDRAND instruction is available
   112  	rdseed:		"RDSEED",	// RDSEED instruction is available
   113  	adx:		"ADX",		// Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
   114  	sha:		"SHA",		// Intel SHA Extensions
   115  	avx512f:	"AVX512F",	// AVX-512 Foundation
   116  	avx512dq:	"AVX512DQ",	// AVX-512 Doubleword and Quadword Instructions
   117  	avx512ifma:	"AVX512IFMA",	// AVX-512 Integer Fused Multiply-Add Instructions
   118  	avx512pf:	"AVX512PF",	// AVX-512 Prefetch Instructions
   119  	avx512er:	"AVX512ER",	// AVX-512 Exponential and Reciprocal Instructions
   120  	avx512cd:	"AVX512CD",	// AVX-512 Conflict Detection Instructions
   121  	avx512bw:	"AVX512BW",	// AVX-512 Byte and Word Instructions
   122  	avx512vl:	"AVX512VL",	// AVX-512 Vector Length Extensions
   123  	avx512vbmi:	"AVX512VBMI",	// AVX-512 Vector Bit Manipulation Instructions
   124  	mpx:		"MPX",		// Intel MPX (Memory Protection Extensions)
   125  	erms:		"ERMS",		// Enhanced REP MOVSB/STOSB
   126  	rdtscp:		"RDTSCP",	// RDTSCP Instruction
   127  	cx16:		"CX16",		// CMPXCHG16B Instruction
   128  
   129  	// Performance indicators
   130  	sse2slow:	"SSE2SLOW",	// SSE2 supported, but usually not faster
   131  	sse3slow:	"SSE3SLOW",	// SSE3 supported, but usually not faster
   132  	atom:		"ATOM",		// Atom processor, some SSSE3 instructions are slower
   133  
   134  }
   135  
   136  // CPUInfo contains information about the detected system CPU.
   137  type cpuInfo struct {
   138  	brandname	string	// Brand name reported by the CPU
   139  	vendorid	vendor	// Comparable CPU vendor ID
   140  	features	flags	// Features of the CPU
   141  	physicalcores	int	// Number of physical processor cores in your CPU. Will be 0 if undetectable.
   142  	threadspercore	int	// Number of threads per physical core. Will be 1 if undetectable.
   143  	logicalcores	int	// Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
   144  	family		int	// CPU family number
   145  	model		int	// CPU model number
   146  	cacheline	int	// Cache line size in bytes. Will be 0 if undetectable.
   147  	cache		struct {
   148  		l1i	int	// L1 Instruction Cache (per core or shared). Will be -1 if undetected
   149  		l1d	int	// L1 Data Cache (per core or shared). Will be -1 if undetected
   150  		l2	int	// L2 Cache (per core or shared). Will be -1 if undetected
   151  		l3	int	// L3 Instruction Cache (per core or shared). Will be -1 if undetected
   152  	}
   153  	maxFunc		uint32
   154  	maxExFunc	uint32
   155  }
   156  
   157  var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
   158  var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
   159  var xgetbv func(index uint32) (eax, edx uint32)
   160  var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
   161  
   162  // CPU contains information about the CPU as detected on startup,
   163  // or when Detect last was called.
   164  //
   165  // Use this as the primary entry point to you data,
   166  // this way queries are
   167  var cpu cpuInfo
   168  
   169  func init() {
   170  	initCPU()
   171  	detect()
   172  }
   173  
   174  // Detect will re-detect current CPU info.
   175  // This will replace the content of the exported CPU variable.
   176  //
   177  // Unless you expect the CPU to change while you are running your program
   178  // you should not need to call this function.
   179  // If you call this, you must ensure that no other goroutine is accessing the
   180  // exported CPU variable.
   181  func detect() {
   182  	cpu.maxFunc = maxFunctionID()
   183  	cpu.maxExFunc = maxExtendedFunction()
   184  	cpu.brandname = brandName()
   185  	cpu.cacheline = cacheLine()
   186  	cpu.family, cpu.model = familyModel()
   187  	cpu.features = support()
   188  	cpu.threadspercore = threadsPerCore()
   189  	cpu.logicalcores = logicalCores()
   190  	cpu.physicalcores = physicalCores()
   191  	cpu.vendorid = vendorID()
   192  	cpu.cacheSize()
   193  }
   194  
   195  // Generated here: http://play.yougam/libraries/p/BxFH2Gdc0G
   196  
   197  // Cmov indicates support of CMOV instructions
   198  func (c cpuInfo) cmov() bool {
   199  	return c.features&cmov != 0
   200  }
   201  
   202  // Amd3dnow indicates support of AMD 3DNOW! instructions
   203  func (c cpuInfo) amd3dnow() bool {
   204  	return c.features&amd3dnow != 0
   205  }
   206  
   207  // Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
   208  func (c cpuInfo) amd3dnowext() bool {
   209  	return c.features&amd3dnowext != 0
   210  }
   211  
   212  // MMX indicates support of MMX instructions
   213  func (c cpuInfo) mmx() bool {
   214  	return c.features&mmx != 0
   215  }
   216  
   217  // MMXExt indicates support of MMXEXT instructions
   218  // (SSE integer functions or AMD MMX ext)
   219  func (c cpuInfo) mmxext() bool {
   220  	return c.features&mmxext != 0
   221  }
   222  
   223  // SSE indicates support of SSE instructions
   224  func (c cpuInfo) sse() bool {
   225  	return c.features&sse != 0
   226  }
   227  
   228  // SSE2 indicates support of SSE 2 instructions
   229  func (c cpuInfo) sse2() bool {
   230  	return c.features&sse2 != 0
   231  }
   232  
   233  // SSE3 indicates support of SSE 3 instructions
   234  func (c cpuInfo) sse3() bool {
   235  	return c.features&sse3 != 0
   236  }
   237  
   238  // SSSE3 indicates support of SSSE 3 instructions
   239  func (c cpuInfo) ssse3() bool {
   240  	return c.features&ssse3 != 0
   241  }
   242  
   243  // SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
   244  func (c cpuInfo) sse4() bool {
   245  	return c.features&sse4 != 0
   246  }
   247  
   248  // SSE42 indicates support of SSE4.2 instructions
   249  func (c cpuInfo) sse42() bool {
   250  	return c.features&sse42 != 0
   251  }
   252  
   253  // AVX indicates support of AVX instructions
   254  // and operating system support of AVX instructions
   255  func (c cpuInfo) avx() bool {
   256  	return c.features&avx != 0
   257  }
   258  
   259  // AVX2 indicates support of AVX2 instructions
   260  func (c cpuInfo) avx2() bool {
   261  	return c.features&avx2 != 0
   262  }
   263  
   264  // FMA3 indicates support of FMA3 instructions
   265  func (c cpuInfo) fma3() bool {
   266  	return c.features&fma3 != 0
   267  }
   268  
   269  // FMA4 indicates support of FMA4 instructions
   270  func (c cpuInfo) fma4() bool {
   271  	return c.features&fma4 != 0
   272  }
   273  
   274  // XOP indicates support of XOP instructions
   275  func (c cpuInfo) xop() bool {
   276  	return c.features&xop != 0
   277  }
   278  
   279  // F16C indicates support of F16C instructions
   280  func (c cpuInfo) f16c() bool {
   281  	return c.features&f16c != 0
   282  }
   283  
   284  // BMI1 indicates support of BMI1 instructions
   285  func (c cpuInfo) bmi1() bool {
   286  	return c.features&bmi1 != 0
   287  }
   288  
   289  // BMI2 indicates support of BMI2 instructions
   290  func (c cpuInfo) bmi2() bool {
   291  	return c.features&bmi2 != 0
   292  }
   293  
   294  // TBM indicates support of TBM instructions
   295  // (AMD Trailing Bit Manipulation)
   296  func (c cpuInfo) tbm() bool {
   297  	return c.features&tbm != 0
   298  }
   299  
   300  // Lzcnt indicates support of LZCNT instruction
   301  func (c cpuInfo) lzcnt() bool {
   302  	return c.features&lzcnt != 0
   303  }
   304  
   305  // Popcnt indicates support of POPCNT instruction
   306  func (c cpuInfo) popcnt() bool {
   307  	return c.features&popcnt != 0
   308  }
   309  
   310  // HTT indicates the processor has Hyperthreading enabled
   311  func (c cpuInfo) htt() bool {
   312  	return c.features&htt != 0
   313  }
   314  
   315  // SSE2Slow indicates that SSE2 may be slow on this processor
   316  func (c cpuInfo) sse2slow() bool {
   317  	return c.features&sse2slow != 0
   318  }
   319  
   320  // SSE3Slow indicates that SSE3 may be slow on this processor
   321  func (c cpuInfo) sse3slow() bool {
   322  	return c.features&sse3slow != 0
   323  }
   324  
   325  // AesNi indicates support of AES-NI instructions
   326  // (Advanced Encryption Standard New Instructions)
   327  func (c cpuInfo) aesni() bool {
   328  	return c.features&aesni != 0
   329  }
   330  
   331  // Clmul indicates support of CLMUL instructions
   332  // (Carry-less Multiplication)
   333  func (c cpuInfo) clmul() bool {
   334  	return c.features&clmul != 0
   335  }
   336  
   337  // NX indicates support of NX (No-Execute) bit
   338  func (c cpuInfo) nx() bool {
   339  	return c.features&nx != 0
   340  }
   341  
   342  // SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
   343  func (c cpuInfo) sse4a() bool {
   344  	return c.features&sse4a != 0
   345  }
   346  
   347  // HLE indicates support of Hardware Lock Elision
   348  func (c cpuInfo) hle() bool {
   349  	return c.features&hle != 0
   350  }
   351  
   352  // RTM indicates support of Restricted Transactional Memory
   353  func (c cpuInfo) rtm() bool {
   354  	return c.features&rtm != 0
   355  }
   356  
   357  // Rdrand indicates support of RDRAND instruction is available
   358  func (c cpuInfo) rdrand() bool {
   359  	return c.features&rdrand != 0
   360  }
   361  
   362  // Rdseed indicates support of RDSEED instruction is available
   363  func (c cpuInfo) rdseed() bool {
   364  	return c.features&rdseed != 0
   365  }
   366  
   367  // ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
   368  func (c cpuInfo) adx() bool {
   369  	return c.features&adx != 0
   370  }
   371  
   372  // SHA indicates support of Intel SHA Extensions
   373  func (c cpuInfo) sha() bool {
   374  	return c.features&sha != 0
   375  }
   376  
   377  // AVX512F indicates support of AVX-512 Foundation
   378  func (c cpuInfo) avx512f() bool {
   379  	return c.features&avx512f != 0
   380  }
   381  
   382  // AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
   383  func (c cpuInfo) avx512dq() bool {
   384  	return c.features&avx512dq != 0
   385  }
   386  
   387  // AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
   388  func (c cpuInfo) avx512ifma() bool {
   389  	return c.features&avx512ifma != 0
   390  }
   391  
   392  // AVX512PF indicates support of AVX-512 Prefetch Instructions
   393  func (c cpuInfo) avx512pf() bool {
   394  	return c.features&avx512pf != 0
   395  }
   396  
   397  // AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
   398  func (c cpuInfo) avx512er() bool {
   399  	return c.features&avx512er != 0
   400  }
   401  
   402  // AVX512CD indicates support of AVX-512 Conflict Detection Instructions
   403  func (c cpuInfo) avx512cd() bool {
   404  	return c.features&avx512cd != 0
   405  }
   406  
   407  // AVX512BW indicates support of AVX-512 Byte and Word Instructions
   408  func (c cpuInfo) avx512bw() bool {
   409  	return c.features&avx512bw != 0
   410  }
   411  
   412  // AVX512VL indicates support of AVX-512 Vector Length Extensions
   413  func (c cpuInfo) avx512vl() bool {
   414  	return c.features&avx512vl != 0
   415  }
   416  
   417  // AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
   418  func (c cpuInfo) avx512vbmi() bool {
   419  	return c.features&avx512vbmi != 0
   420  }
   421  
   422  // MPX indicates support of Intel MPX (Memory Protection Extensions)
   423  func (c cpuInfo) mpx() bool {
   424  	return c.features&mpx != 0
   425  }
   426  
   427  // ERMS indicates support of Enhanced REP MOVSB/STOSB
   428  func (c cpuInfo) erms() bool {
   429  	return c.features&erms != 0
   430  }
   431  
   432  func (c cpuInfo) rdtscp() bool {
   433  	return c.features&rdtscp != 0
   434  }
   435  
   436  func (c cpuInfo) cx16() bool {
   437  	return c.features&cx16 != 0
   438  }
   439  
   440  // Atom indicates an Atom processor
   441  func (c cpuInfo) atom() bool {
   442  	return c.features&atom != 0
   443  }
   444  
   445  // Intel returns true if vendor is recognized as Intel
   446  func (c cpuInfo) intel() bool {
   447  	return c.vendorid == intel
   448  }
   449  
   450  // AMD returns true if vendor is recognized as AMD
   451  func (c cpuInfo) amd() bool {
   452  	return c.vendorid == amd
   453  }
   454  
   455  // Transmeta returns true if vendor is recognized as Transmeta
   456  func (c cpuInfo) transmeta() bool {
   457  	return c.vendorid == transmeta
   458  }
   459  
   460  // NSC returns true if vendor is recognized as National Semiconductor
   461  func (c cpuInfo) nsc() bool {
   462  	return c.vendorid == nsc
   463  }
   464  
   465  // VIA returns true if vendor is recognized as VIA
   466  func (c cpuInfo) via() bool {
   467  	return c.vendorid == via
   468  }
   469  
   470  // RTCounter returns the 64-bit time-stamp counter
   471  // Uses the RDTSCP instruction. The value 0 is returned
   472  // if the CPU does not support the instruction.
   473  func (c cpuInfo) rtcounter() uint64 {
   474  	if !c.rdtscp() {
   475  		return 0
   476  	}
   477  	a, _, _, d := rdtscpAsm()
   478  	return uint64(a) | (uint64(d) << 32)
   479  }
   480  
   481  // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
   482  // This variable is OS dependent, but on Linux contains information
   483  // about the current cpu/core the code is running on.
   484  // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
   485  func (c cpuInfo) ia32tscaux() uint32 {
   486  	if !c.rdtscp() {
   487  		return 0
   488  	}
   489  	_, _, ecx, _ := rdtscpAsm()
   490  	return ecx
   491  }
   492  
   493  // LogicalCPU will return the Logical CPU the code is currently executing on.
   494  // This is likely to change when the OS re-schedules the running thread
   495  // to another CPU.
   496  // If the current core cannot be detected, -1 will be returned.
   497  func (c cpuInfo) logicalcpu() int {
   498  	if c.maxFunc < 1 {
   499  		return -1
   500  	}
   501  	_, ebx, _, _ := cpuid(1)
   502  	return int(ebx >> 24)
   503  }
   504  
   505  // VM Will return true if the cpu id indicates we are in
   506  // a virtual machine. This is only a hint, and will very likely
   507  // have many false negatives.
   508  func (c cpuInfo) vm() bool {
   509  	switch c.vendorid {
   510  	case msvm, kvm, vmware, xenhvm:
   511  		return true
   512  	}
   513  	return false
   514  }
   515  
   516  // Flags contains detected cpu features and caracteristics
   517  type flags uint64
   518  
   519  // String returns a string representation of the detected
   520  // CPU features.
   521  func (f flags) String() string {
   522  	return strings.Join(f.strings(), ",")
   523  }
   524  
   525  // Strings returns and array of the detected features.
   526  func (f flags) strings() []string {
   527  	s := support()
   528  	r := make([]string, 0, 20)
   529  	for i := uint(0); i < 64; i++ {
   530  		key := flags(1 << i)
   531  		val := flagNames[key]
   532  		if s&key != 0 {
   533  			r = append(r, val)
   534  		}
   535  	}
   536  	return r
   537  }
   538  
   539  func maxExtendedFunction() uint32 {
   540  	eax, _, _, _ := cpuid(0x80000000)
   541  	return eax
   542  }
   543  
   544  func maxFunctionID() uint32 {
   545  	a, _, _, _ := cpuid(0)
   546  	return a
   547  }
   548  
   549  func brandName() string {
   550  	if maxExtendedFunction() >= 0x80000004 {
   551  		v := make([]uint32, 0, 48)
   552  		for i := uint32(0); i < 3; i++ {
   553  			a, b, c, d := cpuid(0x80000002 + i)
   554  			v = append(v, a, b, c, d)
   555  		}
   556  		return strings.Trim(string(valAsString(v...)), " ")
   557  	}
   558  	return "unknown"
   559  }
   560  
   561  func threadsPerCore() int {
   562  	mfi := maxFunctionID()
   563  	if mfi < 0x4 || vendorID() != intel {
   564  		return 1
   565  	}
   566  
   567  	if mfi < 0xb {
   568  		_, b, _, d := cpuid(1)
   569  		if (d & (1 << 28)) != 0 {
   570  			// v will contain logical core count
   571  			v := (b >> 16) & 255
   572  			if v > 1 {
   573  				a4, _, _, _ := cpuid(4)
   574  				// physical cores
   575  				v2 := (a4 >> 26) + 1
   576  				if v2 > 0 {
   577  					return int(v) / int(v2)
   578  				}
   579  			}
   580  		}
   581  		return 1
   582  	}
   583  	_, b, _, _ := cpuidex(0xb, 0)
   584  	if b&0xffff == 0 {
   585  		return 1
   586  	}
   587  	return int(b & 0xffff)
   588  }
   589  
   590  func logicalCores() int {
   591  	mfi := maxFunctionID()
   592  	switch vendorID() {
   593  	case intel:
   594  		// Use this on old Intel processors
   595  		if mfi < 0xb {
   596  			if mfi < 1 {
   597  				return 0
   598  			}
   599  			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
   600  			// that can be assigned to logical processors in a physical package.
   601  			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
   602  			_, ebx, _, _ := cpuid(1)
   603  			logical := (ebx >> 16) & 0xff
   604  			return int(logical)
   605  		}
   606  		_, b, _, _ := cpuidex(0xb, 1)
   607  		return int(b & 0xffff)
   608  	case amd:
   609  		_, b, _, _ := cpuid(1)
   610  		return int((b >> 16) & 0xff)
   611  	default:
   612  		return 0
   613  	}
   614  }
   615  
   616  func familyModel() (int, int) {
   617  	if maxFunctionID() < 0x1 {
   618  		return 0, 0
   619  	}
   620  	eax, _, _, _ := cpuid(1)
   621  	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
   622  	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
   623  	return int(family), int(model)
   624  }
   625  
   626  func physicalCores() int {
   627  	switch vendorID() {
   628  	case intel:
   629  		return logicalCores() / threadsPerCore()
   630  	case amd:
   631  		if maxExtendedFunction() >= 0x80000008 {
   632  			_, _, c, _ := cpuid(0x80000008)
   633  			return int(c&0xff) + 1
   634  		}
   635  	}
   636  	return 0
   637  }
   638  
   639  // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
   640  var vendorMapping = map[string]vendor{
   641  	"AMDisbetter!":	amd,
   642  	"AuthenticAMD":	amd,
   643  	"CentaurHauls":	via,
   644  	"GenuineIntel":	intel,
   645  	"TransmetaCPU":	transmeta,
   646  	"GenuineTMx86":	transmeta,
   647  	"Geode by NSC":	nsc,
   648  	"VIA VIA VIA ":	via,
   649  	"KVMKVMKVMKVM":	kvm,
   650  	"Microsoft Hv":	msvm,
   651  	"VMwareVMware":	vmware,
   652  	"XenVMMXenVMM":	xenhvm,
   653  }
   654  
   655  func vendorID() vendor {
   656  	_, b, c, d := cpuid(0)
   657  	v := valAsString(b, d, c)
   658  	vend, ok := vendorMapping[string(v)]
   659  	if !ok {
   660  		return other
   661  	}
   662  	return vend
   663  }
   664  
   665  func cacheLine() int {
   666  	if maxFunctionID() < 0x1 {
   667  		return 0
   668  	}
   669  
   670  	_, ebx, _, _ := cpuid(1)
   671  	cache := (ebx & 0xff00) >> 5	// cflush size
   672  	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
   673  		_, _, ecx, _ := cpuid(0x80000006)
   674  		cache = ecx & 0xff	// cacheline size
   675  	}
   676  	// TODO: Read from Cache and TLB Information
   677  	return int(cache)
   678  }
   679  
   680  func (c *cpuInfo) cacheSize() {
   681  	c.cache.l1d = -1
   682  	c.cache.l1i = -1
   683  	c.cache.l2 = -1
   684  	c.cache.l3 = -1
   685  	vendor := vendorID()
   686  	switch vendor {
   687  	case intel:
   688  		if maxFunctionID() < 4 {
   689  			return
   690  		}
   691  		for i := uint32(0); ; i++ {
   692  			eax, ebx, ecx, _ := cpuidex(4, i)
   693  			cacheType := eax & 15
   694  			if cacheType == 0 {
   695  				break
   696  			}
   697  			cacheLevel := (eax >> 5) & 7
   698  			coherency := int(ebx&0xfff) + 1
   699  			partitions := int((ebx>>12)&0x3ff) + 1
   700  			associativity := int((ebx>>22)&0x3ff) + 1
   701  			sets := int(ecx) + 1
   702  			size := associativity * partitions * coherency * sets
   703  			switch cacheLevel {
   704  			case 1:
   705  				if cacheType == 1 {
   706  					// 1 = Data Cache
   707  					c.cache.l1d = size
   708  				} else if cacheType == 2 {
   709  					// 2 = Instruction Cache
   710  					c.cache.l1i = size
   711  				} else {
   712  					if c.cache.l1d < 0 {
   713  						c.cache.l1i = size
   714  					}
   715  					if c.cache.l1i < 0 {
   716  						c.cache.l1i = size
   717  					}
   718  				}
   719  			case 2:
   720  				c.cache.l2 = size
   721  			case 3:
   722  				c.cache.l3 = size
   723  			}
   724  		}
   725  	case amd:
   726  		// Untested.
   727  		if maxExtendedFunction() < 0x80000005 {
   728  			return
   729  		}
   730  		_, _, ecx, edx := cpuid(0x80000005)
   731  		c.cache.l1d = int(((ecx >> 24) & 0xFF) * 1024)
   732  		c.cache.l1i = int(((edx >> 24) & 0xFF) * 1024)
   733  
   734  		if maxExtendedFunction() < 0x80000006 {
   735  			return
   736  		}
   737  		_, _, ecx, _ = cpuid(0x80000006)
   738  		c.cache.l2 = int(((ecx >> 16) & 0xFFFF) * 1024)
   739  	}
   740  
   741  	return
   742  }
   743  
   744  func support() flags {
   745  	mfi := maxFunctionID()
   746  	vend := vendorID()
   747  	if mfi < 0x1 {
   748  		return 0
   749  	}
   750  	rval := uint64(0)
   751  	_, _, c, d := cpuid(1)
   752  	if (d & (1 << 15)) != 0 {
   753  		rval |= cmov
   754  	}
   755  	if (d & (1 << 23)) != 0 {
   756  		rval |= mmx
   757  	}
   758  	if (d & (1 << 25)) != 0 {
   759  		rval |= mmxext
   760  	}
   761  	if (d & (1 << 25)) != 0 {
   762  		rval |= sse
   763  	}
   764  	if (d & (1 << 26)) != 0 {
   765  		rval |= sse2
   766  	}
   767  	if (c & 1) != 0 {
   768  		rval |= sse3
   769  	}
   770  	if (c & 0x00000200) != 0 {
   771  		rval |= ssse3
   772  	}
   773  	if (c & 0x00080000) != 0 {
   774  		rval |= sse4
   775  	}
   776  	if (c & 0x00100000) != 0 {
   777  		rval |= sse42
   778  	}
   779  	if (c & (1 << 25)) != 0 {
   780  		rval |= aesni
   781  	}
   782  	if (c & (1 << 1)) != 0 {
   783  		rval |= clmul
   784  	}
   785  	if c&(1<<23) != 0 {
   786  		rval |= popcnt
   787  	}
   788  	if c&(1<<30) != 0 {
   789  		rval |= rdrand
   790  	}
   791  	if c&(1<<29) != 0 {
   792  		rval |= f16c
   793  	}
   794  	if c&(1<<13) != 0 {
   795  		rval |= cx16
   796  	}
   797  	if vend == intel && (d&(1<<28)) != 0 && mfi >= 4 {
   798  		if threadsPerCore() > 1 {
   799  			rval |= htt
   800  		}
   801  	}
   802  
   803  	// Check XGETBV, OXSAVE and AVX bits
   804  	if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
   805  		// Check for OS support
   806  		eax, _ := xgetbv(0)
   807  		if (eax & 0x6) == 0x6 {
   808  			rval |= avx
   809  			if (c & 0x00001000) != 0 {
   810  				rval |= fma3
   811  			}
   812  		}
   813  	}
   814  
   815  	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
   816  	if mfi >= 7 {
   817  		_, ebx, ecx, _ := cpuidex(7, 0)
   818  		if (rval&avx) != 0 && (ebx&0x00000020) != 0 {
   819  			rval |= avx2
   820  		}
   821  		if (ebx & 0x00000008) != 0 {
   822  			rval |= bmi1
   823  			if (ebx & 0x00000100) != 0 {
   824  				rval |= bmi2
   825  			}
   826  		}
   827  		if ebx&(1<<4) != 0 {
   828  			rval |= hle
   829  		}
   830  		if ebx&(1<<9) != 0 {
   831  			rval |= erms
   832  		}
   833  		if ebx&(1<<11) != 0 {
   834  			rval |= rtm
   835  		}
   836  		if ebx&(1<<14) != 0 {
   837  			rval |= mpx
   838  		}
   839  		if ebx&(1<<18) != 0 {
   840  			rval |= rdseed
   841  		}
   842  		if ebx&(1<<19) != 0 {
   843  			rval |= adx
   844  		}
   845  		if ebx&(1<<29) != 0 {
   846  			rval |= sha
   847  		}
   848  
   849  		// Only detect AVX-512 features if XGETBV is supported
   850  		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
   851  			// Check for OS support
   852  			eax, _ := xgetbv(0)
   853  
   854  			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
   855  			// ZMM16-ZMM31 state are enabled by OS)
   856  			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
   857  			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
   858  				if ebx&(1<<16) != 0 {
   859  					rval |= avx512f
   860  				}
   861  				if ebx&(1<<17) != 0 {
   862  					rval |= avx512dq
   863  				}
   864  				if ebx&(1<<21) != 0 {
   865  					rval |= avx512ifma
   866  				}
   867  				if ebx&(1<<26) != 0 {
   868  					rval |= avx512pf
   869  				}
   870  				if ebx&(1<<27) != 0 {
   871  					rval |= avx512er
   872  				}
   873  				if ebx&(1<<28) != 0 {
   874  					rval |= avx512cd
   875  				}
   876  				if ebx&(1<<30) != 0 {
   877  					rval |= avx512bw
   878  				}
   879  				if ebx&(1<<31) != 0 {
   880  					rval |= avx512vl
   881  				}
   882  				// ecx
   883  				if ecx&(1<<1) != 0 {
   884  					rval |= avx512vbmi
   885  				}
   886  			}
   887  		}
   888  	}
   889  
   890  	if maxExtendedFunction() >= 0x80000001 {
   891  		_, _, c, d := cpuid(0x80000001)
   892  		if (c & (1 << 5)) != 0 {
   893  			rval |= lzcnt
   894  			rval |= popcnt
   895  		}
   896  		if (d & (1 << 31)) != 0 {
   897  			rval |= amd3dnow
   898  		}
   899  		if (d & (1 << 30)) != 0 {
   900  			rval |= amd3dnowext
   901  		}
   902  		if (d & (1 << 23)) != 0 {
   903  			rval |= mmx
   904  		}
   905  		if (d & (1 << 22)) != 0 {
   906  			rval |= mmxext
   907  		}
   908  		if (c & (1 << 6)) != 0 {
   909  			rval |= sse4a
   910  		}
   911  		if d&(1<<20) != 0 {
   912  			rval |= nx
   913  		}
   914  		if d&(1<<27) != 0 {
   915  			rval |= rdtscp
   916  		}
   917  
   918  		/* Allow for selectively disabling SSE2 functions on AMD processors
   919  		   with SSE2 support but not SSE4a. This includes Athlon64, some
   920  		   Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
   921  		   than SSE2 often enough to utilize this special-case flag.
   922  		   AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
   923  		   so that SSE2 is used unless explicitly disabled by checking
   924  		   AV_CPU_FLAG_SSE2SLOW. */
   925  		if vendorID() != intel &&
   926  			rval&sse2 != 0 && (c&0x00000040) == 0 {
   927  			rval |= sse2slow
   928  		}
   929  
   930  		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
   931  		 * used unless the OS has AVX support. */
   932  		if (rval & avx) != 0 {
   933  			if (c & 0x00000800) != 0 {
   934  				rval |= xop
   935  			}
   936  			if (c & 0x00010000) != 0 {
   937  				rval |= fma4
   938  			}
   939  		}
   940  
   941  		if vendorID() == intel {
   942  			family, model := familyModel()
   943  			if family == 6 && (model == 9 || model == 13 || model == 14) {
   944  				/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
   945  				 * 6/14 (core1 "yonah") theoretically support sse2, but it's
   946  				 * usually slower than mmx. */
   947  				if (rval & sse2) != 0 {
   948  					rval |= sse2slow
   949  				}
   950  				if (rval & sse3) != 0 {
   951  					rval |= sse3slow
   952  				}
   953  			}
   954  			/* The Atom processor has SSSE3 support, which is useful in many cases,
   955  			 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
   956  			 * on the Atom, but is generally faster on other processors supporting
   957  			 * SSSE3. This flag allows for selectively disabling certain SSSE3
   958  			 * functions on the Atom. */
   959  			if family == 6 && model == 28 {
   960  				rval |= atom
   961  			}
   962  		}
   963  	}
   964  	return flags(rval)
   965  }
   966  
   967  func valAsString(values ...uint32) []byte {
   968  	r := make([]byte, 4*len(values))
   969  	for i, v := range values {
   970  		dst := r[i*4:]
   971  		dst[0] = byte(v & 0xff)
   972  		dst[1] = byte((v >> 8) & 0xff)
   973  		dst[2] = byte((v >> 16) & 0xff)
   974  		dst[3] = byte((v >> 24) & 0xff)
   975  		switch {
   976  		case dst[0] == 0:
   977  			return r[:i*4]
   978  		case dst[1] == 0:
   979  			return r[:i*4+1]
   980  		case dst[2] == 0:
   981  			return r[:i*4+2]
   982  		case dst[3] == 0:
   983  			return r[:i*4+3]
   984  		}
   985  	}
   986  	return r
   987  }