github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/cpuid/cpuid_x86.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build 386 amd64
    16  
    17  package cpuid
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"io/ioutil"
    23  	"strconv"
    24  	"strings"
    25  
    26  	"github.com/SagerNet/gvisor/pkg/log"
    27  )
    28  
    29  // Common references for CPUID leaves and bits:
    30  //
    31  // Intel:
    32  //   * Intel SDM Volume 2, Chapter 3.2 "CPUID" (more up-to-date)
    33  //   * Intel Application Note 485 (more detailed)
    34  //
    35  // AMD:
    36  //   * AMD64 APM Volume 3, Appendix 3 "Obtaining Processor Information ..."
    37  
    38  // block is a collection of 32 Feature bits.
    39  type block int
    40  
    41  const blockSize = 32
    42  
    43  // Feature bits are numbered according to "blocks". Each block is 32 bits, and
    44  // feature bits from the same source (cpuid leaf/level) are in the same block.
    45  func featureID(b block, bit int) Feature {
    46  	return Feature(32*int(b) + bit)
    47  }
    48  
    49  // Block 0 constants are all of the "basic" feature bits returned by a cpuid in
    50  // ecx with eax=1.
    51  const (
    52  	X86FeatureSSE3 Feature = iota
    53  	X86FeaturePCLMULDQ
    54  	X86FeatureDTES64
    55  	X86FeatureMONITOR
    56  	X86FeatureDSCPL
    57  	X86FeatureVMX
    58  	X86FeatureSMX
    59  	X86FeatureEST
    60  	X86FeatureTM2
    61  	X86FeatureSSSE3 // Not a typo, "supplemental" SSE3.
    62  	X86FeatureCNXTID
    63  	X86FeatureSDBG
    64  	X86FeatureFMA
    65  	X86FeatureCX16
    66  	X86FeatureXTPR
    67  	X86FeaturePDCM
    68  	_ // ecx bit 16 is reserved.
    69  	X86FeaturePCID
    70  	X86FeatureDCA
    71  	X86FeatureSSE4_1
    72  	X86FeatureSSE4_2
    73  	X86FeatureX2APIC
    74  	X86FeatureMOVBE
    75  	X86FeaturePOPCNT
    76  	X86FeatureTSCD
    77  	X86FeatureAES
    78  	X86FeatureXSAVE
    79  	X86FeatureOSXSAVE
    80  	X86FeatureAVX
    81  	X86FeatureF16C
    82  	X86FeatureRDRAND
    83  	_ // ecx bit 31 is reserved.
    84  )
    85  
    86  // Block 1 constants are all of the "basic" feature bits returned by a cpuid in
    87  // edx with eax=1.
    88  const (
    89  	X86FeatureFPU Feature = 32 + iota
    90  	X86FeatureVME
    91  	X86FeatureDE
    92  	X86FeaturePSE
    93  	X86FeatureTSC
    94  	X86FeatureMSR
    95  	X86FeaturePAE
    96  	X86FeatureMCE
    97  	X86FeatureCX8
    98  	X86FeatureAPIC
    99  	_ // edx bit 10 is reserved.
   100  	X86FeatureSEP
   101  	X86FeatureMTRR
   102  	X86FeaturePGE
   103  	X86FeatureMCA
   104  	X86FeatureCMOV
   105  	X86FeaturePAT
   106  	X86FeaturePSE36
   107  	X86FeaturePSN
   108  	X86FeatureCLFSH
   109  	_ // edx bit 20 is reserved.
   110  	X86FeatureDS
   111  	X86FeatureACPI
   112  	X86FeatureMMX
   113  	X86FeatureFXSR
   114  	X86FeatureSSE
   115  	X86FeatureSSE2
   116  	X86FeatureSS
   117  	X86FeatureHTT
   118  	X86FeatureTM
   119  	X86FeatureIA64
   120  	X86FeaturePBE
   121  )
   122  
   123  // Block 2 bits are the "structured extended" features returned in ebx for
   124  // eax=7, ecx=0.
   125  const (
   126  	X86FeatureFSGSBase Feature = 2*32 + iota
   127  	X86FeatureTSC_ADJUST
   128  	_ // ebx bit 2 is reserved.
   129  	X86FeatureBMI1
   130  	X86FeatureHLE
   131  	X86FeatureAVX2
   132  	X86FeatureFDP_EXCPTN_ONLY
   133  	X86FeatureSMEP
   134  	X86FeatureBMI2
   135  	X86FeatureERMS
   136  	X86FeatureINVPCID
   137  	X86FeatureRTM
   138  	X86FeatureCQM
   139  	X86FeatureFPCSDS
   140  	X86FeatureMPX
   141  	X86FeatureRDT
   142  	X86FeatureAVX512F
   143  	X86FeatureAVX512DQ
   144  	X86FeatureRDSEED
   145  	X86FeatureADX
   146  	X86FeatureSMAP
   147  	X86FeatureAVX512IFMA
   148  	X86FeaturePCOMMIT
   149  	X86FeatureCLFLUSHOPT
   150  	X86FeatureCLWB
   151  	X86FeatureIPT // Intel processor trace.
   152  	X86FeatureAVX512PF
   153  	X86FeatureAVX512ER
   154  	X86FeatureAVX512CD
   155  	X86FeatureSHA
   156  	X86FeatureAVX512BW
   157  	X86FeatureAVX512VL
   158  )
   159  
   160  // Block 3 bits are the "extended" features returned in ecx for eax=7, ecx=0.
   161  const (
   162  	X86FeaturePREFETCHWT1 Feature = 3*32 + iota
   163  	X86FeatureAVX512VBMI
   164  	X86FeatureUMIP
   165  	X86FeaturePKU
   166  	X86FeatureOSPKE
   167  	X86FeatureWAITPKG
   168  	X86FeatureAVX512_VBMI2
   169  	_ // ecx bit 7 is reserved
   170  	X86FeatureGFNI
   171  	X86FeatureVAES
   172  	X86FeatureVPCLMULQDQ
   173  	X86FeatureAVX512_VNNI
   174  	X86FeatureAVX512_BITALG
   175  	X86FeatureTME
   176  	X86FeatureAVX512_VPOPCNTDQ
   177  	_ // ecx bit 15 is reserved
   178  	X86FeatureLA57
   179  	// ecx bits 17-21 are reserved
   180  	_
   181  	_
   182  	_
   183  	_
   184  	_
   185  	X86FeatureRDPID
   186  	// ecx bits 23-24 are reserved
   187  	_
   188  	_
   189  	X86FeatureCLDEMOTE
   190  	_ // ecx bit 26 is reserved
   191  	X86FeatureMOVDIRI
   192  	X86FeatureMOVDIR64B
   193  )
   194  
   195  // Block 4 constants are for xsave capabilities in CPUID.(EAX=0DH,ECX=01H):EAX.
   196  // The CPUID leaf is available only if 'X86FeatureXSAVE' is present.
   197  const (
   198  	X86FeatureXSAVEOPT Feature = 4*32 + iota
   199  	X86FeatureXSAVEC
   200  	X86FeatureXGETBV1
   201  	X86FeatureXSAVES
   202  	// EAX[31:4] are reserved.
   203  )
   204  
   205  // Block 5 constants are the extended feature bits in
   206  // CPUID.(EAX=0x80000001):ECX.
   207  const (
   208  	X86FeatureLAHF64 Feature = 5*32 + iota
   209  	X86FeatureCMP_LEGACY
   210  	X86FeatureSVM
   211  	X86FeatureEXTAPIC
   212  	X86FeatureCR8_LEGACY
   213  	X86FeatureLZCNT
   214  	X86FeatureSSE4A
   215  	X86FeatureMISALIGNSSE
   216  	X86FeaturePREFETCHW
   217  	X86FeatureOSVW
   218  	X86FeatureIBS
   219  	X86FeatureXOP
   220  	X86FeatureSKINIT
   221  	X86FeatureWDT
   222  	_ // ecx bit 14 is reserved.
   223  	X86FeatureLWP
   224  	X86FeatureFMA4
   225  	X86FeatureTCE
   226  	_ // ecx bit 18 is reserved.
   227  	_ // ecx bit 19 is reserved.
   228  	_ // ecx bit 20 is reserved.
   229  	X86FeatureTBM
   230  	X86FeatureTOPOLOGY
   231  	X86FeaturePERFCTR_CORE
   232  	X86FeaturePERFCTR_NB
   233  	_ // ecx bit 25 is reserved.
   234  	X86FeatureBPEXT
   235  	X86FeaturePERFCTR_TSC
   236  	X86FeaturePERFCTR_LLC
   237  	X86FeatureMWAITX
   238  	// TODO(b/152776797): Some CPUs set this but it is not documented anywhere.
   239  	X86FeatureBlock5Bit30
   240  	_ // ecx bit 31 is reserved.
   241  )
   242  
   243  // Block 6 constants are the extended feature bits in
   244  // CPUID.(EAX=0x80000001):EDX.
   245  //
   246  // These are sparse, and so the bit positions are assigned manually.
   247  const (
   248  	// On AMD, EDX[24:23] | EDX[17:12] | EDX[9:0] are duplicate features
   249  	// also defined in block 1 (in identical bit positions). Those features
   250  	// are not listed here.
   251  	block6DuplicateMask = 0x183f3ff
   252  
   253  	X86FeatureSYSCALL  Feature = 6*32 + 11
   254  	X86FeatureNX       Feature = 6*32 + 20
   255  	X86FeatureMMXEXT   Feature = 6*32 + 22
   256  	X86FeatureFXSR_OPT Feature = 6*32 + 25
   257  	X86FeatureGBPAGES  Feature = 6*32 + 26
   258  	X86FeatureRDTSCP   Feature = 6*32 + 27
   259  	X86FeatureLM       Feature = 6*32 + 29
   260  	X86Feature3DNOWEXT Feature = 6*32 + 30
   261  	X86Feature3DNOW    Feature = 6*32 + 31
   262  )
   263  
   264  // linuxBlockOrder defines the order in which linux organizes the feature
   265  // blocks. Linux also tracks feature bits in 32-bit blocks, but in an order
   266  // which doesn't match well here, so for the /proc/cpuinfo generation we simply
   267  // re-map the blocks to Linux's ordering and then go through the bits in each
   268  // block.
   269  var linuxBlockOrder = []block{1, 6, 0, 5, 2, 4, 3}
   270  
   271  // To make emulation of /proc/cpuinfo easy, these names match the names of the
   272  // basic features in Linux defined in arch/x86/kernel/cpu/capflags.c.
   273  var x86FeatureStrings = map[Feature]string{
   274  	// Block 0.
   275  	X86FeatureSSE3:     "pni",
   276  	X86FeaturePCLMULDQ: "pclmulqdq",
   277  	X86FeatureDTES64:   "dtes64",
   278  	X86FeatureMONITOR:  "monitor",
   279  	X86FeatureDSCPL:    "ds_cpl",
   280  	X86FeatureVMX:      "vmx",
   281  	X86FeatureSMX:      "smx",
   282  	X86FeatureEST:      "est",
   283  	X86FeatureTM2:      "tm2",
   284  	X86FeatureSSSE3:    "ssse3",
   285  	X86FeatureCNXTID:   "cid",
   286  	X86FeatureSDBG:     "sdbg",
   287  	X86FeatureFMA:      "fma",
   288  	X86FeatureCX16:     "cx16",
   289  	X86FeatureXTPR:     "xtpr",
   290  	X86FeaturePDCM:     "pdcm",
   291  	X86FeaturePCID:     "pcid",
   292  	X86FeatureDCA:      "dca",
   293  	X86FeatureSSE4_1:   "sse4_1",
   294  	X86FeatureSSE4_2:   "sse4_2",
   295  	X86FeatureX2APIC:   "x2apic",
   296  	X86FeatureMOVBE:    "movbe",
   297  	X86FeaturePOPCNT:   "popcnt",
   298  	X86FeatureTSCD:     "tsc_deadline_timer",
   299  	X86FeatureAES:      "aes",
   300  	X86FeatureXSAVE:    "xsave",
   301  	X86FeatureAVX:      "avx",
   302  	X86FeatureF16C:     "f16c",
   303  	X86FeatureRDRAND:   "rdrand",
   304  
   305  	// Block 1.
   306  	X86FeatureFPU:   "fpu",
   307  	X86FeatureVME:   "vme",
   308  	X86FeatureDE:    "de",
   309  	X86FeaturePSE:   "pse",
   310  	X86FeatureTSC:   "tsc",
   311  	X86FeatureMSR:   "msr",
   312  	X86FeaturePAE:   "pae",
   313  	X86FeatureMCE:   "mce",
   314  	X86FeatureCX8:   "cx8",
   315  	X86FeatureAPIC:  "apic",
   316  	X86FeatureSEP:   "sep",
   317  	X86FeatureMTRR:  "mtrr",
   318  	X86FeaturePGE:   "pge",
   319  	X86FeatureMCA:   "mca",
   320  	X86FeatureCMOV:  "cmov",
   321  	X86FeaturePAT:   "pat",
   322  	X86FeaturePSE36: "pse36",
   323  	X86FeaturePSN:   "pn",
   324  	X86FeatureCLFSH: "clflush",
   325  	X86FeatureDS:    "dts",
   326  	X86FeatureACPI:  "acpi",
   327  	X86FeatureMMX:   "mmx",
   328  	X86FeatureFXSR:  "fxsr",
   329  	X86FeatureSSE:   "sse",
   330  	X86FeatureSSE2:  "sse2",
   331  	X86FeatureSS:    "ss",
   332  	X86FeatureHTT:   "ht",
   333  	X86FeatureTM:    "tm",
   334  	X86FeatureIA64:  "ia64",
   335  	X86FeaturePBE:   "pbe",
   336  
   337  	// Block 2.
   338  	X86FeatureFSGSBase:   "fsgsbase",
   339  	X86FeatureTSC_ADJUST: "tsc_adjust",
   340  	X86FeatureBMI1:       "bmi1",
   341  	X86FeatureHLE:        "hle",
   342  	X86FeatureAVX2:       "avx2",
   343  	X86FeatureSMEP:       "smep",
   344  	X86FeatureBMI2:       "bmi2",
   345  	X86FeatureERMS:       "erms",
   346  	X86FeatureINVPCID:    "invpcid",
   347  	X86FeatureRTM:        "rtm",
   348  	X86FeatureCQM:        "cqm",
   349  	X86FeatureMPX:        "mpx",
   350  	X86FeatureRDT:        "rdt_a",
   351  	X86FeatureAVX512F:    "avx512f",
   352  	X86FeatureAVX512DQ:   "avx512dq",
   353  	X86FeatureRDSEED:     "rdseed",
   354  	X86FeatureADX:        "adx",
   355  	X86FeatureSMAP:       "smap",
   356  	X86FeatureCLWB:       "clwb",
   357  	X86FeatureAVX512PF:   "avx512pf",
   358  	X86FeatureAVX512ER:   "avx512er",
   359  	X86FeatureAVX512CD:   "avx512cd",
   360  	X86FeatureSHA:        "sha_ni",
   361  	X86FeatureAVX512BW:   "avx512bw",
   362  	X86FeatureAVX512VL:   "avx512vl",
   363  
   364  	// Block 3.
   365  	X86FeatureAVX512VBMI:       "avx512vbmi",
   366  	X86FeatureUMIP:             "umip",
   367  	X86FeaturePKU:              "pku",
   368  	X86FeatureOSPKE:            "ospke",
   369  	X86FeatureWAITPKG:          "waitpkg",
   370  	X86FeatureAVX512_VBMI2:     "avx512_vbmi2",
   371  	X86FeatureGFNI:             "gfni",
   372  	X86FeatureVAES:             "vaes",
   373  	X86FeatureVPCLMULQDQ:       "vpclmulqdq",
   374  	X86FeatureAVX512_VNNI:      "avx512_vnni",
   375  	X86FeatureAVX512_BITALG:    "avx512_bitalg",
   376  	X86FeatureTME:              "tme",
   377  	X86FeatureAVX512_VPOPCNTDQ: "avx512_vpopcntdq",
   378  	X86FeatureLA57:             "la57",
   379  	X86FeatureRDPID:            "rdpid",
   380  	X86FeatureCLDEMOTE:         "cldemote",
   381  	X86FeatureMOVDIRI:          "movdiri",
   382  	X86FeatureMOVDIR64B:        "movdir64b",
   383  
   384  	// Block 4.
   385  	X86FeatureXSAVEOPT: "xsaveopt",
   386  	X86FeatureXSAVEC:   "xsavec",
   387  	X86FeatureXGETBV1:  "xgetbv1",
   388  	X86FeatureXSAVES:   "xsaves",
   389  
   390  	// Block 5.
   391  	X86FeatureLAHF64:       "lahf_lm", // LAHF/SAHF in long mode
   392  	X86FeatureCMP_LEGACY:   "cmp_legacy",
   393  	X86FeatureSVM:          "svm",
   394  	X86FeatureEXTAPIC:      "extapic",
   395  	X86FeatureCR8_LEGACY:   "cr8_legacy",
   396  	X86FeatureLZCNT:        "abm", // Advanced bit manipulation
   397  	X86FeatureSSE4A:        "sse4a",
   398  	X86FeatureMISALIGNSSE:  "misalignsse",
   399  	X86FeaturePREFETCHW:    "3dnowprefetch",
   400  	X86FeatureOSVW:         "osvw",
   401  	X86FeatureIBS:          "ibs",
   402  	X86FeatureXOP:          "xop",
   403  	X86FeatureSKINIT:       "skinit",
   404  	X86FeatureWDT:          "wdt",
   405  	X86FeatureLWP:          "lwp",
   406  	X86FeatureFMA4:         "fma4",
   407  	X86FeatureTCE:          "tce",
   408  	X86FeatureTBM:          "tbm",
   409  	X86FeatureTOPOLOGY:     "topoext",
   410  	X86FeaturePERFCTR_CORE: "perfctr_core",
   411  	X86FeaturePERFCTR_NB:   "perfctr_nb",
   412  	X86FeatureBPEXT:        "bpext",
   413  	X86FeaturePERFCTR_TSC:  "ptsc",
   414  	X86FeaturePERFCTR_LLC:  "perfctr_llc",
   415  	X86FeatureMWAITX:       "mwaitx",
   416  
   417  	// Block 6.
   418  	X86FeatureSYSCALL:  "syscall",
   419  	X86FeatureNX:       "nx",
   420  	X86FeatureMMXEXT:   "mmxext",
   421  	X86FeatureFXSR_OPT: "fxsr_opt",
   422  	X86FeatureGBPAGES:  "pdpe1gb",
   423  	X86FeatureRDTSCP:   "rdtscp",
   424  	X86FeatureLM:       "lm",
   425  	X86Feature3DNOWEXT: "3dnowext",
   426  	X86Feature3DNOW:    "3dnow",
   427  }
   428  
   429  // These flags are parse only---they can be used for setting / unsetting the
   430  // flags, but will not get printed out in /proc/cpuinfo.
   431  var x86FeatureParseOnlyStrings = map[Feature]string{
   432  	// Block 0.
   433  	X86FeatureOSXSAVE: "osxsave",
   434  
   435  	// Block 2.
   436  	X86FeatureFDP_EXCPTN_ONLY: "fdp_excptn_only",
   437  	X86FeatureFPCSDS:          "fpcsds",
   438  	X86FeatureIPT:             "pt",
   439  	X86FeatureCLFLUSHOPT:      "clfushopt",
   440  
   441  	// Block 3.
   442  	X86FeaturePREFETCHWT1: "prefetchwt1",
   443  
   444  	// Block 5.
   445  	X86FeatureBlock5Bit30: "block5_bit30",
   446  }
   447  
   448  // intelCacheDescriptors describe the caches and TLBs on the system. They are
   449  // returned in the registers for eax=2. Intel only.
   450  type intelCacheDescriptor uint8
   451  
   452  // Valid cache/TLB descriptors. All descriptors can be found in Intel SDM Vol.
   453  // 2, Ch. 3.2, "CPUID", Table 3-12 "Encoding of CPUID Leaf 2 Descriptors".
   454  const (
   455  	intelNullDescriptor    intelCacheDescriptor = 0
   456  	intelNoTLBDescriptor   intelCacheDescriptor = 0xfe
   457  	intelNoCacheDescriptor intelCacheDescriptor = 0xff
   458  
   459  	// Most descriptors omitted for brevity as they are currently unused.
   460  )
   461  
   462  // CacheType describes the type of a cache, as returned in eax[4:0] for eax=4.
   463  type CacheType uint8
   464  
   465  const (
   466  	// cacheNull indicates that there are no more entries.
   467  	cacheNull CacheType = iota
   468  
   469  	// CacheData is a data cache.
   470  	CacheData
   471  
   472  	// CacheInstruction is an instruction cache.
   473  	CacheInstruction
   474  
   475  	// CacheUnified is a unified instruction and data cache.
   476  	CacheUnified
   477  )
   478  
   479  // Cache describes the parameters of a single cache on the system.
   480  //
   481  // +stateify savable
   482  type Cache struct {
   483  	// Level is the hierarchical level of this cache (L1, L2, etc).
   484  	Level uint32
   485  
   486  	// Type is the type of cache.
   487  	Type CacheType
   488  
   489  	// FullyAssociative indicates that entries may be placed in any block.
   490  	FullyAssociative bool
   491  
   492  	// Partitions is the number of physical partitions in the cache.
   493  	Partitions uint32
   494  
   495  	// Ways is the number of ways of associativity in the cache.
   496  	Ways uint32
   497  
   498  	// Sets is the number of sets in the cache.
   499  	Sets uint32
   500  
   501  	// InvalidateHierarchical indicates that WBINVD/INVD from threads
   502  	// sharing this cache acts upon lower level caches for threads sharing
   503  	// this cache.
   504  	InvalidateHierarchical bool
   505  
   506  	// Inclusive indicates that this cache is inclusive of lower cache
   507  	// levels.
   508  	Inclusive bool
   509  
   510  	// DirectMapped indicates that this cache is directly mapped from
   511  	// address, rather than using a hash function.
   512  	DirectMapped bool
   513  }
   514  
   515  // Just a way to wrap cpuid function numbers.
   516  type cpuidFunction uint32
   517  
   518  // The constants below are the lower or "standard" cpuid functions, ordered as
   519  // defined by the hardware.
   520  const (
   521  	vendorID                      cpuidFunction = iota // Returns vendor ID and largest standard function.
   522  	featureInfo                                        // Returns basic feature bits and processor signature.
   523  	intelCacheDescriptors                              // Returns list of cache descriptors. Intel only.
   524  	intelSerialNumber                                  // Returns processor serial number (obsolete on new hardware). Intel only.
   525  	intelDeterministicCacheParams                      // Returns deterministic cache information. Intel only.
   526  	monitorMwaitParams                                 // Returns information about monitor/mwait instructions.
   527  	powerParams                                        // Returns information about power management and thermal sensors.
   528  	extendedFeatureInfo                                // Returns extended feature bits.
   529  	_                                                  // Function 0x8 is reserved.
   530  	intelDCAParams                                     // Returns direct cache access information. Intel only.
   531  	intelPMCInfo                                       // Returns information about performance monitoring features. Intel only.
   532  	intelX2APICInfo                                    // Returns core/logical processor topology. Intel only.
   533  	_                                                  // Function 0xc is reserved.
   534  	xSaveInfo                                          // Returns information about extended state management.
   535  )
   536  
   537  // The "extended" functions start at 0x80000000.
   538  const (
   539  	extendedFunctionInfo cpuidFunction = 0x80000000 + iota // Returns highest available extended function in eax.
   540  	extendedFeatures                                       // Returns some extended feature bits in edx and ecx.
   541  )
   542  
   543  // These are the extended floating point state features. They are used to
   544  // enumerate floating point features in XCR0, XSTATE_BV, etc.
   545  const (
   546  	XSAVEFeatureX87         = 1 << 0
   547  	XSAVEFeatureSSE         = 1 << 1
   548  	XSAVEFeatureAVX         = 1 << 2
   549  	XSAVEFeatureBNDREGS     = 1 << 3
   550  	XSAVEFeatureBNDCSR      = 1 << 4
   551  	XSAVEFeatureAVX512op    = 1 << 5
   552  	XSAVEFeatureAVX512zmm0  = 1 << 6
   553  	XSAVEFeatureAVX512zmm16 = 1 << 7
   554  	XSAVEFeaturePKRU        = 1 << 9
   555  )
   556  
   557  var cpuFreqMHz float64
   558  
   559  // x86FeaturesFromString includes features from x86FeatureStrings and
   560  // x86FeatureParseOnlyStrings.
   561  var x86FeaturesFromString = make(map[string]Feature)
   562  
   563  // FeatureFromString returns the Feature associated with the given feature
   564  // string plus a bool to indicate if it could find the feature.
   565  func FeatureFromString(s string) (Feature, bool) {
   566  	f, b := x86FeaturesFromString[s]
   567  	return f, b
   568  }
   569  
   570  // String implements fmt.Stringer.
   571  func (f Feature) String() string {
   572  	if s := f.flagString(false); s != "" {
   573  		return s
   574  	}
   575  
   576  	block := int(f) / 32
   577  	bit := int(f) % 32
   578  	return fmt.Sprintf("<cpuflag %d; block %d bit %d>", f, block, bit)
   579  }
   580  
   581  func (f Feature) flagString(cpuinfoOnly bool) string {
   582  	if s, ok := x86FeatureStrings[f]; ok {
   583  		return s
   584  	}
   585  	if !cpuinfoOnly {
   586  		return x86FeatureParseOnlyStrings[f]
   587  	}
   588  	return ""
   589  }
   590  
   591  // FeatureSet is a set of Features for a CPU.
   592  //
   593  // +stateify savable
   594  type FeatureSet struct {
   595  	// Set is the set of features that are enabled in this FeatureSet.
   596  	Set map[Feature]bool
   597  
   598  	// VendorID is the 12-char string returned in ebx:edx:ecx for eax=0.
   599  	VendorID string
   600  
   601  	// ExtendedFamily is part of the processor signature.
   602  	ExtendedFamily uint8
   603  
   604  	// ExtendedModel is part of the processor signature.
   605  	ExtendedModel uint8
   606  
   607  	// ProcessorType is part of the processor signature.
   608  	ProcessorType uint8
   609  
   610  	// Family is part of the processor signature.
   611  	Family uint8
   612  
   613  	// Model is part of the processor signature.
   614  	Model uint8
   615  
   616  	// SteppingID is part of the processor signature.
   617  	SteppingID uint8
   618  
   619  	// Caches describes the caches on the CPU.
   620  	Caches []Cache
   621  
   622  	// CacheLine is the size of a cache line in bytes.
   623  	//
   624  	// All caches use the same line size. This is not enforced in the CPUID
   625  	// encoding, but is true on all known x86 processors.
   626  	CacheLine uint32
   627  }
   628  
   629  // FlagsString prints out supported CPU flags. If cpuinfoOnly is true, it is
   630  // equivalent to the "flags" field in /proc/cpuinfo.
   631  func (fs *FeatureSet) FlagsString(cpuinfoOnly bool) string {
   632  	var s []string
   633  	for _, b := range linuxBlockOrder {
   634  		for i := 0; i < blockSize; i++ {
   635  			if f := featureID(b, i); fs.Set[f] {
   636  				if fstr := f.flagString(cpuinfoOnly); fstr != "" {
   637  					s = append(s, fstr)
   638  				}
   639  			}
   640  		}
   641  	}
   642  	return strings.Join(s, " ")
   643  }
   644  
   645  // WriteCPUInfoTo is to generate a section of one cpu in /proc/cpuinfo. This is
   646  // a minimal /proc/cpuinfo, it is missing some fields like "microcode" that are
   647  // not always printed in Linux. The bogomips field is simply made up.
   648  func (fs FeatureSet) WriteCPUInfoTo(cpu uint, b *bytes.Buffer) {
   649  	fmt.Fprintf(b, "processor\t: %d\n", cpu)
   650  	fmt.Fprintf(b, "vendor_id\t: %s\n", fs.VendorID)
   651  	fmt.Fprintf(b, "cpu family\t: %d\n", ((fs.ExtendedFamily<<4)&0xff)|fs.Family)
   652  	fmt.Fprintf(b, "model\t\t: %d\n", ((fs.ExtendedModel<<4)&0xff)|fs.Model)
   653  	fmt.Fprintf(b, "model name\t: %s\n", "unknown") // Unknown for now.
   654  	fmt.Fprintf(b, "stepping\t: %s\n", "unknown")   // Unknown for now.
   655  	fmt.Fprintf(b, "cpu MHz\t\t: %.3f\n", cpuFreqMHz)
   656  	fmt.Fprintln(b, "fpu\t\t: yes")
   657  	fmt.Fprintln(b, "fpu_exception\t: yes")
   658  	fmt.Fprintf(b, "cpuid level\t: %d\n", uint32(xSaveInfo)) // Same as ax in vendorID.
   659  	fmt.Fprintln(b, "wp\t\t: yes")
   660  	fmt.Fprintf(b, "flags\t\t: %s\n", fs.FlagsString(true))
   661  	fmt.Fprintf(b, "bogomips\t: %.02f\n", cpuFreqMHz) // It's bogus anyway.
   662  	fmt.Fprintf(b, "clflush size\t: %d\n", fs.CacheLine)
   663  	fmt.Fprintf(b, "cache_alignment\t: %d\n", fs.CacheLine)
   664  	fmt.Fprintf(b, "address sizes\t: %d bits physical, %d bits virtual\n", 46, 48)
   665  	fmt.Fprintln(b, "power management:") // This is always here, but can be blank.
   666  	fmt.Fprintln(b, "")                  // The /proc/cpuinfo file ends with an extra newline.
   667  }
   668  
   669  const (
   670  	amdVendorID   = "AuthenticAMD"
   671  	intelVendorID = "GenuineIntel"
   672  )
   673  
   674  // AMD returns true if fs describes an AMD CPU.
   675  func (fs *FeatureSet) AMD() bool {
   676  	return fs.VendorID == amdVendorID
   677  }
   678  
   679  // Intel returns true if fs describes an Intel CPU.
   680  func (fs *FeatureSet) Intel() bool {
   681  	return fs.VendorID == intelVendorID
   682  }
   683  
   684  // CheckHostCompatible returns nil if fs is a subset of the host feature set.
   685  func (fs *FeatureSet) CheckHostCompatible() error {
   686  	hfs := HostFeatureSet()
   687  
   688  	if diff := fs.Subtract(hfs); diff != nil {
   689  		return ErrIncompatible{fmt.Sprintf("CPU feature set %v incompatible with host feature set %v (missing: %v)", fs.FlagsString(false), hfs.FlagsString(false), diff)}
   690  	}
   691  
   692  	// The size of a cache line must match, as it is critical to correctly
   693  	// utilizing CLFLUSH. Other cache properties are allowed to change, as
   694  	// they are not important to correctness.
   695  	if fs.CacheLine != hfs.CacheLine {
   696  		return ErrIncompatible{fmt.Sprintf("CPU cache line size %d incompatible with host cache line size %d", fs.CacheLine, hfs.CacheLine)}
   697  	}
   698  
   699  	return nil
   700  }
   701  
   702  // Helper to convert 3 regs into 12-byte vendor ID.
   703  func vendorIDFromRegs(bx, cx, dx uint32) string {
   704  	bytes := make([]byte, 0, 12)
   705  	for i := uint(0); i < 4; i++ {
   706  		b := byte(bx >> (i * 8))
   707  		bytes = append(bytes, b)
   708  	}
   709  
   710  	for i := uint(0); i < 4; i++ {
   711  		b := byte(dx >> (i * 8))
   712  		bytes = append(bytes, b)
   713  	}
   714  
   715  	for i := uint(0); i < 4; i++ {
   716  		b := byte(cx >> (i * 8))
   717  		bytes = append(bytes, b)
   718  	}
   719  	return string(bytes)
   720  }
   721  
   722  var maxXsaveSize = func() uint32 {
   723  	// Leaf 0 of xsaveinfo function returns the size for currently
   724  	// enabled xsave features in ebx, the maximum size if all valid
   725  	// features are saved with xsave in ecx, and valid XCR0 bits in
   726  	// edx:eax.
   727  	//
   728  	// If xSaveInfo isn't supported, cpuid will not fault but will
   729  	// return bogus values.
   730  	_, _, maxXsaveSize, _ := HostID(uint32(xSaveInfo), 0)
   731  	return maxXsaveSize
   732  }()
   733  
   734  // ExtendedStateSize returns the number of bytes needed to save the "extended
   735  // state" for this processor and the boundary it must be aligned to. Extended
   736  // state includes floating point registers, and other cpu state that's not
   737  // associated with the normal task context.
   738  //
   739  // Note: We can save some space here with an optimization where we use a
   740  // smaller chunk of memory depending on features that are actually enabled.
   741  // Currently we just use the largest possible size for simplicity (which is
   742  // about 2.5K worst case, with avx512).
   743  func (fs *FeatureSet) ExtendedStateSize() (size, align uint) {
   744  	if fs.UseXsave() {
   745  		return uint(maxXsaveSize), 64
   746  	}
   747  
   748  	// If we don't support xsave, we fall back to fxsave, which requires
   749  	// 512 bytes aligned to 16 bytes.
   750  	return 512, 16
   751  }
   752  
   753  // ValidXCR0Mask returns the bits that may be set to 1 in control register
   754  // XCR0.
   755  func (fs *FeatureSet) ValidXCR0Mask() uint64 {
   756  	if !fs.UseXsave() {
   757  		return 0
   758  	}
   759  	eax, _, _, edx := HostID(uint32(xSaveInfo), 0)
   760  	return uint64(edx)<<32 | uint64(eax)
   761  }
   762  
   763  // vendorIDRegs returns the 3 register values used to construct the 12-byte
   764  // vendor ID string for eax=0.
   765  func (fs *FeatureSet) vendorIDRegs() (bx, dx, cx uint32) {
   766  	for i := uint(0); i < 4; i++ {
   767  		bx |= uint32(fs.VendorID[i]) << (i * 8)
   768  	}
   769  
   770  	for i := uint(0); i < 4; i++ {
   771  		dx |= uint32(fs.VendorID[i+4]) << (i * 8)
   772  	}
   773  
   774  	for i := uint(0); i < 4; i++ {
   775  		cx |= uint32(fs.VendorID[i+8]) << (i * 8)
   776  	}
   777  	return
   778  }
   779  
   780  // signature returns the signature dword that's returned in eax when eax=1.
   781  func (fs *FeatureSet) signature() uint32 {
   782  	var s uint32
   783  	s |= uint32(fs.SteppingID & 0xf)
   784  	s |= uint32(fs.Model&0xf) << 4
   785  	s |= uint32(fs.Family&0xf) << 8
   786  	s |= uint32(fs.ProcessorType&0x3) << 12
   787  	s |= uint32(fs.ExtendedModel&0xf) << 16
   788  	s |= uint32(fs.ExtendedFamily&0xff) << 20
   789  	return s
   790  }
   791  
   792  // Helper to deconstruct signature dword.
   793  func signatureSplit(v uint32) (ef, em, pt, f, m, sid uint8) {
   794  	sid = uint8(v & 0xf)
   795  	m = uint8(v>>4) & 0xf
   796  	f = uint8(v>>8) & 0xf
   797  	pt = uint8(v>>12) & 0x3
   798  	em = uint8(v>>16) & 0xf
   799  	ef = uint8(v >> 20)
   800  	return
   801  }
   802  
   803  // Helper to convert blockwise feature bit masks into a set of features. Masks
   804  // must be provided in order for each block, without skipping them. If a block
   805  // does not matter for this feature set, 0 is specified.
   806  func setFromBlockMasks(blocks ...uint32) map[Feature]bool {
   807  	s := make(map[Feature]bool)
   808  	for b, blockMask := range blocks {
   809  		for i := 0; i < blockSize; i++ {
   810  			if blockMask&1 != 0 {
   811  				s[featureID(block(b), i)] = true
   812  			}
   813  			blockMask >>= 1
   814  		}
   815  	}
   816  	return s
   817  }
   818  
   819  // blockMask returns the 32-bit mask associated with a block of features.
   820  func (fs *FeatureSet) blockMask(b block) uint32 {
   821  	var mask uint32
   822  	for i := 0; i < blockSize; i++ {
   823  		if fs.Set[featureID(b, i)] {
   824  			mask |= 1 << uint(i)
   825  		}
   826  	}
   827  	return mask
   828  }
   829  
   830  // Remove removes a Feature from a FeatureSet. It ignores features
   831  // that are not in the FeatureSet.
   832  func (fs *FeatureSet) Remove(feature Feature) {
   833  	delete(fs.Set, feature)
   834  }
   835  
   836  // Add adds a Feature to a FeatureSet. It ignores duplicate features.
   837  func (fs *FeatureSet) Add(feature Feature) {
   838  	fs.Set[feature] = true
   839  }
   840  
   841  // HasFeature tests whether or not a feature is in the given feature set.
   842  func (fs *FeatureSet) HasFeature(feature Feature) bool {
   843  	return fs.Set[feature]
   844  }
   845  
   846  // Subtract returns the features present in fs that are not present in other.
   847  // If all features in fs are present in other, Subtract returns nil.
   848  func (fs *FeatureSet) Subtract(other *FeatureSet) (diff map[Feature]bool) {
   849  	for f := range fs.Set {
   850  		if !other.Set[f] {
   851  			if diff == nil {
   852  				diff = make(map[Feature]bool)
   853  			}
   854  			diff[f] = true
   855  		}
   856  	}
   857  
   858  	return
   859  }
   860  
   861  // EmulateID emulates a cpuid instruction based on the feature set.
   862  func (fs *FeatureSet) EmulateID(origAx, origCx uint32) (ax, bx, cx, dx uint32) {
   863  	switch cpuidFunction(origAx) {
   864  	case vendorID:
   865  		ax = uint32(xSaveInfo) // 0xd (xSaveInfo) is the highest function we support.
   866  		bx, dx, cx = fs.vendorIDRegs()
   867  	case featureInfo:
   868  		// CLFLUSH line size is encoded in quadwords. Other fields in bx unsupported.
   869  		bx = (fs.CacheLine / 8) << 8
   870  		cx = fs.blockMask(block(0))
   871  		dx = fs.blockMask(block(1))
   872  		ax = fs.signature()
   873  	case intelCacheDescriptors:
   874  		if !fs.Intel() {
   875  			// Reserved on non-Intel.
   876  			return 0, 0, 0, 0
   877  		}
   878  
   879  		// "The least-significant byte in register EAX (register AL)
   880  		// will always return 01H. Software should ignore this value
   881  		// and not interpret it as an informational descriptor." - SDM
   882  		//
   883  		// We only support reporting cache parameters via
   884  		// intelDeterministicCacheParams; report as much here.
   885  		//
   886  		// We do not support exposing TLB information at all.
   887  		ax = 1 | (uint32(intelNoCacheDescriptor) << 8)
   888  	case intelDeterministicCacheParams:
   889  		if !fs.Intel() {
   890  			// Reserved on non-Intel.
   891  			return 0, 0, 0, 0
   892  		}
   893  
   894  		// cx is the index of the cache to describe.
   895  		if int(origCx) >= len(fs.Caches) {
   896  			return uint32(cacheNull), 0, 0, 0
   897  		}
   898  		c := fs.Caches[origCx]
   899  
   900  		ax = uint32(c.Type)
   901  		ax |= c.Level << 5
   902  		ax |= 1 << 8 // Always claim the cache is "self-initializing".
   903  		if c.FullyAssociative {
   904  			ax |= 1 << 9
   905  		}
   906  		// Processor topology not supported.
   907  
   908  		bx = fs.CacheLine - 1
   909  		bx |= (c.Partitions - 1) << 12
   910  		bx |= (c.Ways - 1) << 22
   911  
   912  		cx = c.Sets - 1
   913  
   914  		if !c.InvalidateHierarchical {
   915  			dx |= 1
   916  		}
   917  		if c.Inclusive {
   918  			dx |= 1 << 1
   919  		}
   920  		if !c.DirectMapped {
   921  			dx |= 1 << 2
   922  		}
   923  	case xSaveInfo:
   924  		if !fs.UseXsave() {
   925  			return 0, 0, 0, 0
   926  		}
   927  		return HostID(uint32(xSaveInfo), origCx)
   928  	case extendedFeatureInfo:
   929  		if origCx != 0 {
   930  			break // Only leaf 0 is supported.
   931  		}
   932  		bx = fs.blockMask(block(2))
   933  		cx = fs.blockMask(block(3))
   934  	case extendedFunctionInfo:
   935  		// We only support showing the extended features.
   936  		ax = uint32(extendedFeatures)
   937  		cx = 0
   938  	case extendedFeatures:
   939  		cx = fs.blockMask(block(5))
   940  		dx = fs.blockMask(block(6))
   941  		if fs.AMD() {
   942  			// AMD duplicates some block 1 features in block 6.
   943  			dx |= fs.blockMask(block(1)) & block6DuplicateMask
   944  		}
   945  	}
   946  
   947  	return
   948  }
   949  
   950  // UseXsave returns the choice of fp state saving instruction.
   951  func (fs *FeatureSet) UseXsave() bool {
   952  	return fs.HasFeature(X86FeatureXSAVE) && fs.HasFeature(X86FeatureOSXSAVE)
   953  }
   954  
   955  // UseXsaveopt returns true if 'fs' supports the "xsaveopt" instruction.
   956  func (fs *FeatureSet) UseXsaveopt() bool {
   957  	return fs.UseXsave() && fs.HasFeature(X86FeatureXSAVEOPT)
   958  }
   959  
   960  // HostID executes a native CPUID instruction.
   961  func HostID(axArg, cxArg uint32) (ax, bx, cx, dx uint32)
   962  
   963  // HostFeatureSet uses cpuid to get host values and construct a feature set
   964  // that matches that of the host machine. Note that there are several places
   965  // where there appear to be some unnecessary assignments between register names
   966  // (ax, bx, cx, or dx) and featureBlockN variables. This is to explicitly show
   967  // where the different feature blocks come from, to make the code easier to
   968  // inspect and read.
   969  func HostFeatureSet() *FeatureSet {
   970  	// eax=0 gets max supported feature and vendor ID.
   971  	_, bx, cx, dx := HostID(0, 0)
   972  	vendorID := vendorIDFromRegs(bx, cx, dx)
   973  
   974  	// eax=1 gets basic features in ecx:edx.
   975  	ax, bx, cx, dx := HostID(1, 0)
   976  	featureBlock0 := cx
   977  	featureBlock1 := dx
   978  	ef, em, pt, f, m, sid := signatureSplit(ax)
   979  	cacheLine := 8 * (bx >> 8) & 0xff
   980  
   981  	// eax=4, ecx=i gets details about cache index i. Only supported on Intel.
   982  	var caches []Cache
   983  	if vendorID == intelVendorID {
   984  		// ecx selects the cache index until a null type is returned.
   985  		for i := uint32(0); ; i++ {
   986  			ax, bx, cx, dx := HostID(4, i)
   987  			t := CacheType(ax & 0xf)
   988  			if t == cacheNull {
   989  				break
   990  			}
   991  
   992  			lineSize := (bx & 0xfff) + 1
   993  			if lineSize != cacheLine {
   994  				panic(fmt.Sprintf("Mismatched cache line size: %d vs %d", lineSize, cacheLine))
   995  			}
   996  
   997  			caches = append(caches, Cache{
   998  				Type:                   t,
   999  				Level:                  (ax >> 5) & 0x7,
  1000  				FullyAssociative:       ((ax >> 9) & 1) == 1,
  1001  				Partitions:             ((bx >> 12) & 0x3ff) + 1,
  1002  				Ways:                   ((bx >> 22) & 0x3ff) + 1,
  1003  				Sets:                   cx + 1,
  1004  				InvalidateHierarchical: (dx & 1) == 0,
  1005  				Inclusive:              ((dx >> 1) & 1) == 1,
  1006  				DirectMapped:           ((dx >> 2) & 1) == 0,
  1007  			})
  1008  		}
  1009  	}
  1010  
  1011  	// eax=7, ecx=0 gets extended features in ecx:ebx.
  1012  	_, bx, cx, _ = HostID(7, 0)
  1013  	featureBlock2 := bx
  1014  	featureBlock3 := cx
  1015  
  1016  	// Leaf 0xd is supported only if CPUID.1:ECX.XSAVE[bit 26] is set.
  1017  	var featureBlock4 uint32
  1018  	if (featureBlock0 & (1 << 26)) != 0 {
  1019  		featureBlock4, _, _, _ = HostID(uint32(xSaveInfo), 1)
  1020  	}
  1021  
  1022  	// eax=0x80000000 gets supported extended levels. We use this to
  1023  	// determine if there are any non-zero block 4 or block 6 bits to find.
  1024  	var featureBlock5, featureBlock6 uint32
  1025  	if ax, _, _, _ := HostID(uint32(extendedFunctionInfo), 0); ax >= uint32(extendedFeatures) {
  1026  		// eax=0x80000001 gets AMD added feature bits.
  1027  		_, _, cx, dx = HostID(uint32(extendedFeatures), 0)
  1028  		featureBlock5 = cx
  1029  		// Ignore features duplicated from block 1 on AMD. These bits
  1030  		// are reserved on Intel.
  1031  		featureBlock6 = dx &^ block6DuplicateMask
  1032  	}
  1033  
  1034  	set := setFromBlockMasks(featureBlock0, featureBlock1, featureBlock2, featureBlock3, featureBlock4, featureBlock5, featureBlock6)
  1035  	return &FeatureSet{
  1036  		Set:            set,
  1037  		VendorID:       vendorID,
  1038  		ExtendedFamily: ef,
  1039  		ExtendedModel:  em,
  1040  		ProcessorType:  pt,
  1041  		Family:         f,
  1042  		Model:          m,
  1043  		SteppingID:     sid,
  1044  		CacheLine:      cacheLine,
  1045  		Caches:         caches,
  1046  	}
  1047  }
  1048  
  1049  // Reads max cpu frequency from host /proc/cpuinfo. Must run before syscall
  1050  // filter installation. This value is used to create the fake /proc/cpuinfo
  1051  // from a FeatureSet.
  1052  func initCPUFreq() {
  1053  	cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo")
  1054  	if err != nil {
  1055  		// Leave it as 0... The standalone VDSO bails out in the same
  1056  		// way.
  1057  		log.Warningf("Could not read /proc/cpuinfo: %v", err)
  1058  		return
  1059  	}
  1060  	cpuinfo := string(cpuinfob)
  1061  
  1062  	// We get the value straight from host /proc/cpuinfo. On machines with
  1063  	// frequency scaling enabled, this will only get the current value
  1064  	// which will likely be inaccurate. This is fine on machines with
  1065  	// frequency scaling disabled.
  1066  	for _, line := range strings.Split(cpuinfo, "\n") {
  1067  		if strings.Contains(line, "cpu MHz") {
  1068  			splitMHz := strings.Split(line, ":")
  1069  			if len(splitMHz) < 2 {
  1070  				log.Warningf("Could not read /proc/cpuinfo: malformed cpu MHz line")
  1071  				return
  1072  			}
  1073  
  1074  			// If there was a problem, leave cpuFreqMHz as 0.
  1075  			var err error
  1076  			cpuFreqMHz, err = strconv.ParseFloat(strings.TrimSpace(splitMHz[1]), 64)
  1077  			if err != nil {
  1078  				log.Warningf("Could not parse cpu MHz value %v: %v", splitMHz[1], err)
  1079  				cpuFreqMHz = 0
  1080  				return
  1081  			}
  1082  			return
  1083  		}
  1084  	}
  1085  	log.Warningf("Could not parse /proc/cpuinfo, it is empty or does not contain cpu MHz")
  1086  }
  1087  
  1088  func initFeaturesFromString() {
  1089  	for f, s := range x86FeatureStrings {
  1090  		x86FeaturesFromString[s] = f
  1091  	}
  1092  	for f, s := range x86FeatureParseOnlyStrings {
  1093  		x86FeaturesFromString[s] = f
  1094  	}
  1095  }
  1096  
  1097  func init() {
  1098  	initCPUFreq()
  1099  	initFeaturesFromString()
  1100  }