github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/klauspost/cpuid/private/cpuid.go (about) 1 // Generated, DO NOT EDIT, 2 // but copy it to your own project and rename the package. 3 // See more at http://yougam/libraries/klauspost/cpuid 4 5 package cpuid 6 7 import ( 8 "strings" 9 ) 10 11 // Vendor is a representation of a CPU vendor. 12 type vendor int 13 14 const ( 15 other vendor = iota 16 intel 17 amd 18 via 19 transmeta 20 nsc 21 kvm // Kernel-based Virtual Machine 22 msvm // Microsoft Hyper-V or Windows Virtual PC 23 vmware 24 xenhvm 25 ) 26 27 const ( 28 cmov = 1 << iota // i686 CMOV 29 nx // NX (No-Execute) bit 30 amd3dnow // AMD 3DNOW 31 amd3dnowext // AMD 3DNowExt 32 mmx // standard MMX 33 mmxext // SSE integer functions or AMD MMX ext 34 sse // SSE functions 35 sse2 // P4 SSE functions 36 sse3 // Prescott SSE3 functions 37 ssse3 // Conroe SSSE3 functions 38 sse4 // Penryn SSE4.1 functions 39 sse4a // AMD Barcelona microarchitecture SSE4a instructions 40 sse42 // Nehalem SSE4.2 functions 41 avx // AVX functions 42 avx2 // AVX2 functions 43 fma3 // Intel FMA 3 44 fma4 // Bulldozer FMA4 functions 45 xop // Bulldozer XOP functions 46 f16c // Half-precision floating-point conversion 47 bmi1 // Bit Manipulation Instruction Set 1 48 bmi2 // Bit Manipulation Instruction Set 2 49 tbm // AMD Trailing Bit Manipulation 50 lzcnt // LZCNT instruction 51 popcnt // POPCNT instruction 52 aesni // Advanced Encryption Standard New Instructions 53 clmul // Carry-less Multiplication 54 htt // Hyperthreading (enabled) 55 hle // Hardware Lock Elision 56 rtm // Restricted Transactional Memory 57 rdrand // RDRAND instruction is available 58 rdseed // RDSEED instruction is available 59 adx // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 60 sha // Intel SHA Extensions 61 avx512f // AVX-512 Foundation 62 avx512dq // AVX-512 Doubleword and Quadword Instructions 63 avx512ifma // AVX-512 Integer Fused Multiply-Add Instructions 64 avx512pf // AVX-512 Prefetch Instructions 65 avx512er // AVX-512 Exponential and Reciprocal Instructions 66 avx512cd // AVX-512 Conflict Detection Instructions 67 avx512bw // AVX-512 Byte and Word Instructions 68 avx512vl // AVX-512 Vector Length Extensions 69 avx512vbmi // AVX-512 Vector Bit Manipulation Instructions 70 mpx // Intel MPX (Memory Protection Extensions) 71 erms // Enhanced REP MOVSB/STOSB 72 rdtscp // RDTSCP Instruction 73 cx16 // CMPXCHG16B Instruction 74 75 // Performance indicators 76 sse2slow // SSE2 is supported, but usually not faster 77 sse3slow // SSE3 is supported, but usually not faster 78 atom // Atom processor, some SSSE3 instructions are slower 79 ) 80 81 var flagNames = map[flags]string{ 82 cmov: "CMOV", // i686 CMOV 83 nx: "NX", // NX (No-Execute) bit 84 amd3dnow: "AMD3DNOW", // AMD 3DNOW 85 amd3dnowext: "AMD3DNOWEXT", // AMD 3DNowExt 86 mmx: "MMX", // Standard MMX 87 mmxext: "MMXEXT", // SSE integer functions or AMD MMX ext 88 sse: "SSE", // SSE functions 89 sse2: "SSE2", // P4 SSE2 functions 90 sse3: "SSE3", // Prescott SSE3 functions 91 ssse3: "SSSE3", // Conroe SSSE3 functions 92 sse4: "SSE4.1", // Penryn SSE4.1 functions 93 sse4a: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions 94 sse42: "SSE4.2", // Nehalem SSE4.2 functions 95 avx: "AVX", // AVX functions 96 avx2: "AVX2", // AVX functions 97 fma3: "FMA3", // Intel FMA 3 98 fma4: "FMA4", // Bulldozer FMA4 functions 99 xop: "XOP", // Bulldozer XOP functions 100 f16c: "F16C", // Half-precision floating-point conversion 101 bmi1: "BMI1", // Bit Manipulation Instruction Set 1 102 bmi2: "BMI2", // Bit Manipulation Instruction Set 2 103 tbm: "TBM", // AMD Trailing Bit Manipulation 104 lzcnt: "LZCNT", // LZCNT instruction 105 popcnt: "POPCNT", // POPCNT instruction 106 aesni: "AESNI", // Advanced Encryption Standard New Instructions 107 clmul: "CLMUL", // Carry-less Multiplication 108 htt: "HTT", // Hyperthreading (enabled) 109 hle: "HLE", // Hardware Lock Elision 110 rtm: "RTM", // Restricted Transactional Memory 111 rdrand: "RDRAND", // RDRAND instruction is available 112 rdseed: "RDSEED", // RDSEED instruction is available 113 adx: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 114 sha: "SHA", // Intel SHA Extensions 115 avx512f: "AVX512F", // AVX-512 Foundation 116 avx512dq: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions 117 avx512ifma: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions 118 avx512pf: "AVX512PF", // AVX-512 Prefetch Instructions 119 avx512er: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions 120 avx512cd: "AVX512CD", // AVX-512 Conflict Detection Instructions 121 avx512bw: "AVX512BW", // AVX-512 Byte and Word Instructions 122 avx512vl: "AVX512VL", // AVX-512 Vector Length Extensions 123 avx512vbmi: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions 124 mpx: "MPX", // Intel MPX (Memory Protection Extensions) 125 erms: "ERMS", // Enhanced REP MOVSB/STOSB 126 rdtscp: "RDTSCP", // RDTSCP Instruction 127 cx16: "CX16", // CMPXCHG16B Instruction 128 129 // Performance indicators 130 sse2slow: "SSE2SLOW", // SSE2 supported, but usually not faster 131 sse3slow: "SSE3SLOW", // SSE3 supported, but usually not faster 132 atom: "ATOM", // Atom processor, some SSSE3 instructions are slower 133 134 } 135 136 // CPUInfo contains information about the detected system CPU. 137 type cpuInfo struct { 138 brandname string // Brand name reported by the CPU 139 vendorid vendor // Comparable CPU vendor ID 140 features flags // Features of the CPU 141 physicalcores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. 142 threadspercore int // Number of threads per physical core. Will be 1 if undetectable. 143 logicalcores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. 144 family int // CPU family number 145 model int // CPU model number 146 cacheline int // Cache line size in bytes. Will be 0 if undetectable. 147 cache struct { 148 l1i int // L1 Instruction Cache (per core or shared). Will be -1 if undetected 149 l1d int // L1 Data Cache (per core or shared). Will be -1 if undetected 150 l2 int // L2 Cache (per core or shared). Will be -1 if undetected 151 l3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected 152 } 153 maxFunc uint32 154 maxExFunc uint32 155 } 156 157 var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) 158 var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) 159 var xgetbv func(index uint32) (eax, edx uint32) 160 var rdtscpAsm func() (eax, ebx, ecx, edx uint32) 161 162 // CPU contains information about the CPU as detected on startup, 163 // or when Detect last was called. 164 // 165 // Use this as the primary entry point to you data, 166 // this way queries are 167 var cpu cpuInfo 168 169 func init() { 170 initCPU() 171 detect() 172 } 173 174 // Detect will re-detect current CPU info. 175 // This will replace the content of the exported CPU variable. 176 // 177 // Unless you expect the CPU to change while you are running your program 178 // you should not need to call this function. 179 // If you call this, you must ensure that no other goroutine is accessing the 180 // exported CPU variable. 181 func detect() { 182 cpu.maxFunc = maxFunctionID() 183 cpu.maxExFunc = maxExtendedFunction() 184 cpu.brandname = brandName() 185 cpu.cacheline = cacheLine() 186 cpu.family, cpu.model = familyModel() 187 cpu.features = support() 188 cpu.threadspercore = threadsPerCore() 189 cpu.logicalcores = logicalCores() 190 cpu.physicalcores = physicalCores() 191 cpu.vendorid = vendorID() 192 cpu.cacheSize() 193 } 194 195 // Generated here: http://play.yougam/libraries/p/BxFH2Gdc0G 196 197 // Cmov indicates support of CMOV instructions 198 func (c cpuInfo) cmov() bool { 199 return c.features&cmov != 0 200 } 201 202 // Amd3dnow indicates support of AMD 3DNOW! instructions 203 func (c cpuInfo) amd3dnow() bool { 204 return c.features&amd3dnow != 0 205 } 206 207 // Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions 208 func (c cpuInfo) amd3dnowext() bool { 209 return c.features&amd3dnowext != 0 210 } 211 212 // MMX indicates support of MMX instructions 213 func (c cpuInfo) mmx() bool { 214 return c.features&mmx != 0 215 } 216 217 // MMXExt indicates support of MMXEXT instructions 218 // (SSE integer functions or AMD MMX ext) 219 func (c cpuInfo) mmxext() bool { 220 return c.features&mmxext != 0 221 } 222 223 // SSE indicates support of SSE instructions 224 func (c cpuInfo) sse() bool { 225 return c.features&sse != 0 226 } 227 228 // SSE2 indicates support of SSE 2 instructions 229 func (c cpuInfo) sse2() bool { 230 return c.features&sse2 != 0 231 } 232 233 // SSE3 indicates support of SSE 3 instructions 234 func (c cpuInfo) sse3() bool { 235 return c.features&sse3 != 0 236 } 237 238 // SSSE3 indicates support of SSSE 3 instructions 239 func (c cpuInfo) ssse3() bool { 240 return c.features&ssse3 != 0 241 } 242 243 // SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions 244 func (c cpuInfo) sse4() bool { 245 return c.features&sse4 != 0 246 } 247 248 // SSE42 indicates support of SSE4.2 instructions 249 func (c cpuInfo) sse42() bool { 250 return c.features&sse42 != 0 251 } 252 253 // AVX indicates support of AVX instructions 254 // and operating system support of AVX instructions 255 func (c cpuInfo) avx() bool { 256 return c.features&avx != 0 257 } 258 259 // AVX2 indicates support of AVX2 instructions 260 func (c cpuInfo) avx2() bool { 261 return c.features&avx2 != 0 262 } 263 264 // FMA3 indicates support of FMA3 instructions 265 func (c cpuInfo) fma3() bool { 266 return c.features&fma3 != 0 267 } 268 269 // FMA4 indicates support of FMA4 instructions 270 func (c cpuInfo) fma4() bool { 271 return c.features&fma4 != 0 272 } 273 274 // XOP indicates support of XOP instructions 275 func (c cpuInfo) xop() bool { 276 return c.features&xop != 0 277 } 278 279 // F16C indicates support of F16C instructions 280 func (c cpuInfo) f16c() bool { 281 return c.features&f16c != 0 282 } 283 284 // BMI1 indicates support of BMI1 instructions 285 func (c cpuInfo) bmi1() bool { 286 return c.features&bmi1 != 0 287 } 288 289 // BMI2 indicates support of BMI2 instructions 290 func (c cpuInfo) bmi2() bool { 291 return c.features&bmi2 != 0 292 } 293 294 // TBM indicates support of TBM instructions 295 // (AMD Trailing Bit Manipulation) 296 func (c cpuInfo) tbm() bool { 297 return c.features&tbm != 0 298 } 299 300 // Lzcnt indicates support of LZCNT instruction 301 func (c cpuInfo) lzcnt() bool { 302 return c.features&lzcnt != 0 303 } 304 305 // Popcnt indicates support of POPCNT instruction 306 func (c cpuInfo) popcnt() bool { 307 return c.features&popcnt != 0 308 } 309 310 // HTT indicates the processor has Hyperthreading enabled 311 func (c cpuInfo) htt() bool { 312 return c.features&htt != 0 313 } 314 315 // SSE2Slow indicates that SSE2 may be slow on this processor 316 func (c cpuInfo) sse2slow() bool { 317 return c.features&sse2slow != 0 318 } 319 320 // SSE3Slow indicates that SSE3 may be slow on this processor 321 func (c cpuInfo) sse3slow() bool { 322 return c.features&sse3slow != 0 323 } 324 325 // AesNi indicates support of AES-NI instructions 326 // (Advanced Encryption Standard New Instructions) 327 func (c cpuInfo) aesni() bool { 328 return c.features&aesni != 0 329 } 330 331 // Clmul indicates support of CLMUL instructions 332 // (Carry-less Multiplication) 333 func (c cpuInfo) clmul() bool { 334 return c.features&clmul != 0 335 } 336 337 // NX indicates support of NX (No-Execute) bit 338 func (c cpuInfo) nx() bool { 339 return c.features&nx != 0 340 } 341 342 // SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions 343 func (c cpuInfo) sse4a() bool { 344 return c.features&sse4a != 0 345 } 346 347 // HLE indicates support of Hardware Lock Elision 348 func (c cpuInfo) hle() bool { 349 return c.features&hle != 0 350 } 351 352 // RTM indicates support of Restricted Transactional Memory 353 func (c cpuInfo) rtm() bool { 354 return c.features&rtm != 0 355 } 356 357 // Rdrand indicates support of RDRAND instruction is available 358 func (c cpuInfo) rdrand() bool { 359 return c.features&rdrand != 0 360 } 361 362 // Rdseed indicates support of RDSEED instruction is available 363 func (c cpuInfo) rdseed() bool { 364 return c.features&rdseed != 0 365 } 366 367 // ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions) 368 func (c cpuInfo) adx() bool { 369 return c.features&adx != 0 370 } 371 372 // SHA indicates support of Intel SHA Extensions 373 func (c cpuInfo) sha() bool { 374 return c.features&sha != 0 375 } 376 377 // AVX512F indicates support of AVX-512 Foundation 378 func (c cpuInfo) avx512f() bool { 379 return c.features&avx512f != 0 380 } 381 382 // AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions 383 func (c cpuInfo) avx512dq() bool { 384 return c.features&avx512dq != 0 385 } 386 387 // AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions 388 func (c cpuInfo) avx512ifma() bool { 389 return c.features&avx512ifma != 0 390 } 391 392 // AVX512PF indicates support of AVX-512 Prefetch Instructions 393 func (c cpuInfo) avx512pf() bool { 394 return c.features&avx512pf != 0 395 } 396 397 // AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions 398 func (c cpuInfo) avx512er() bool { 399 return c.features&avx512er != 0 400 } 401 402 // AVX512CD indicates support of AVX-512 Conflict Detection Instructions 403 func (c cpuInfo) avx512cd() bool { 404 return c.features&avx512cd != 0 405 } 406 407 // AVX512BW indicates support of AVX-512 Byte and Word Instructions 408 func (c cpuInfo) avx512bw() bool { 409 return c.features&avx512bw != 0 410 } 411 412 // AVX512VL indicates support of AVX-512 Vector Length Extensions 413 func (c cpuInfo) avx512vl() bool { 414 return c.features&avx512vl != 0 415 } 416 417 // AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions 418 func (c cpuInfo) avx512vbmi() bool { 419 return c.features&avx512vbmi != 0 420 } 421 422 // MPX indicates support of Intel MPX (Memory Protection Extensions) 423 func (c cpuInfo) mpx() bool { 424 return c.features&mpx != 0 425 } 426 427 // ERMS indicates support of Enhanced REP MOVSB/STOSB 428 func (c cpuInfo) erms() bool { 429 return c.features&erms != 0 430 } 431 432 func (c cpuInfo) rdtscp() bool { 433 return c.features&rdtscp != 0 434 } 435 436 func (c cpuInfo) cx16() bool { 437 return c.features&cx16 != 0 438 } 439 440 // Atom indicates an Atom processor 441 func (c cpuInfo) atom() bool { 442 return c.features&atom != 0 443 } 444 445 // Intel returns true if vendor is recognized as Intel 446 func (c cpuInfo) intel() bool { 447 return c.vendorid == intel 448 } 449 450 // AMD returns true if vendor is recognized as AMD 451 func (c cpuInfo) amd() bool { 452 return c.vendorid == amd 453 } 454 455 // Transmeta returns true if vendor is recognized as Transmeta 456 func (c cpuInfo) transmeta() bool { 457 return c.vendorid == transmeta 458 } 459 460 // NSC returns true if vendor is recognized as National Semiconductor 461 func (c cpuInfo) nsc() bool { 462 return c.vendorid == nsc 463 } 464 465 // VIA returns true if vendor is recognized as VIA 466 func (c cpuInfo) via() bool { 467 return c.vendorid == via 468 } 469 470 // RTCounter returns the 64-bit time-stamp counter 471 // Uses the RDTSCP instruction. The value 0 is returned 472 // if the CPU does not support the instruction. 473 func (c cpuInfo) rtcounter() uint64 { 474 if !c.rdtscp() { 475 return 0 476 } 477 a, _, _, d := rdtscpAsm() 478 return uint64(a) | (uint64(d) << 32) 479 } 480 481 // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. 482 // This variable is OS dependent, but on Linux contains information 483 // about the current cpu/core the code is running on. 484 // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. 485 func (c cpuInfo) ia32tscaux() uint32 { 486 if !c.rdtscp() { 487 return 0 488 } 489 _, _, ecx, _ := rdtscpAsm() 490 return ecx 491 } 492 493 // LogicalCPU will return the Logical CPU the code is currently executing on. 494 // This is likely to change when the OS re-schedules the running thread 495 // to another CPU. 496 // If the current core cannot be detected, -1 will be returned. 497 func (c cpuInfo) logicalcpu() int { 498 if c.maxFunc < 1 { 499 return -1 500 } 501 _, ebx, _, _ := cpuid(1) 502 return int(ebx >> 24) 503 } 504 505 // VM Will return true if the cpu id indicates we are in 506 // a virtual machine. This is only a hint, and will very likely 507 // have many false negatives. 508 func (c cpuInfo) vm() bool { 509 switch c.vendorid { 510 case msvm, kvm, vmware, xenhvm: 511 return true 512 } 513 return false 514 } 515 516 // Flags contains detected cpu features and caracteristics 517 type flags uint64 518 519 // String returns a string representation of the detected 520 // CPU features. 521 func (f flags) String() string { 522 return strings.Join(f.strings(), ",") 523 } 524 525 // Strings returns and array of the detected features. 526 func (f flags) strings() []string { 527 s := support() 528 r := make([]string, 0, 20) 529 for i := uint(0); i < 64; i++ { 530 key := flags(1 << i) 531 val := flagNames[key] 532 if s&key != 0 { 533 r = append(r, val) 534 } 535 } 536 return r 537 } 538 539 func maxExtendedFunction() uint32 { 540 eax, _, _, _ := cpuid(0x80000000) 541 return eax 542 } 543 544 func maxFunctionID() uint32 { 545 a, _, _, _ := cpuid(0) 546 return a 547 } 548 549 func brandName() string { 550 if maxExtendedFunction() >= 0x80000004 { 551 v := make([]uint32, 0, 48) 552 for i := uint32(0); i < 3; i++ { 553 a, b, c, d := cpuid(0x80000002 + i) 554 v = append(v, a, b, c, d) 555 } 556 return strings.Trim(string(valAsString(v...)), " ") 557 } 558 return "unknown" 559 } 560 561 func threadsPerCore() int { 562 mfi := maxFunctionID() 563 if mfi < 0x4 || vendorID() != intel { 564 return 1 565 } 566 567 if mfi < 0xb { 568 _, b, _, d := cpuid(1) 569 if (d & (1 << 28)) != 0 { 570 // v will contain logical core count 571 v := (b >> 16) & 255 572 if v > 1 { 573 a4, _, _, _ := cpuid(4) 574 // physical cores 575 v2 := (a4 >> 26) + 1 576 if v2 > 0 { 577 return int(v) / int(v2) 578 } 579 } 580 } 581 return 1 582 } 583 _, b, _, _ := cpuidex(0xb, 0) 584 if b&0xffff == 0 { 585 return 1 586 } 587 return int(b & 0xffff) 588 } 589 590 func logicalCores() int { 591 mfi := maxFunctionID() 592 switch vendorID() { 593 case intel: 594 // Use this on old Intel processors 595 if mfi < 0xb { 596 if mfi < 1 { 597 return 0 598 } 599 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) 600 // that can be assigned to logical processors in a physical package. 601 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. 602 _, ebx, _, _ := cpuid(1) 603 logical := (ebx >> 16) & 0xff 604 return int(logical) 605 } 606 _, b, _, _ := cpuidex(0xb, 1) 607 return int(b & 0xffff) 608 case amd: 609 _, b, _, _ := cpuid(1) 610 return int((b >> 16) & 0xff) 611 default: 612 return 0 613 } 614 } 615 616 func familyModel() (int, int) { 617 if maxFunctionID() < 0x1 { 618 return 0, 0 619 } 620 eax, _, _, _ := cpuid(1) 621 family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) 622 model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) 623 return int(family), int(model) 624 } 625 626 func physicalCores() int { 627 switch vendorID() { 628 case intel: 629 return logicalCores() / threadsPerCore() 630 case amd: 631 if maxExtendedFunction() >= 0x80000008 { 632 _, _, c, _ := cpuid(0x80000008) 633 return int(c&0xff) + 1 634 } 635 } 636 return 0 637 } 638 639 // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID 640 var vendorMapping = map[string]vendor{ 641 "AMDisbetter!": amd, 642 "AuthenticAMD": amd, 643 "CentaurHauls": via, 644 "GenuineIntel": intel, 645 "TransmetaCPU": transmeta, 646 "GenuineTMx86": transmeta, 647 "Geode by NSC": nsc, 648 "VIA VIA VIA ": via, 649 "KVMKVMKVMKVM": kvm, 650 "Microsoft Hv": msvm, 651 "VMwareVMware": vmware, 652 "XenVMMXenVMM": xenhvm, 653 } 654 655 func vendorID() vendor { 656 _, b, c, d := cpuid(0) 657 v := valAsString(b, d, c) 658 vend, ok := vendorMapping[string(v)] 659 if !ok { 660 return other 661 } 662 return vend 663 } 664 665 func cacheLine() int { 666 if maxFunctionID() < 0x1 { 667 return 0 668 } 669 670 _, ebx, _, _ := cpuid(1) 671 cache := (ebx & 0xff00) >> 5 // cflush size 672 if cache == 0 && maxExtendedFunction() >= 0x80000006 { 673 _, _, ecx, _ := cpuid(0x80000006) 674 cache = ecx & 0xff // cacheline size 675 } 676 // TODO: Read from Cache and TLB Information 677 return int(cache) 678 } 679 680 func (c *cpuInfo) cacheSize() { 681 c.cache.l1d = -1 682 c.cache.l1i = -1 683 c.cache.l2 = -1 684 c.cache.l3 = -1 685 vendor := vendorID() 686 switch vendor { 687 case intel: 688 if maxFunctionID() < 4 { 689 return 690 } 691 for i := uint32(0); ; i++ { 692 eax, ebx, ecx, _ := cpuidex(4, i) 693 cacheType := eax & 15 694 if cacheType == 0 { 695 break 696 } 697 cacheLevel := (eax >> 5) & 7 698 coherency := int(ebx&0xfff) + 1 699 partitions := int((ebx>>12)&0x3ff) + 1 700 associativity := int((ebx>>22)&0x3ff) + 1 701 sets := int(ecx) + 1 702 size := associativity * partitions * coherency * sets 703 switch cacheLevel { 704 case 1: 705 if cacheType == 1 { 706 // 1 = Data Cache 707 c.cache.l1d = size 708 } else if cacheType == 2 { 709 // 2 = Instruction Cache 710 c.cache.l1i = size 711 } else { 712 if c.cache.l1d < 0 { 713 c.cache.l1i = size 714 } 715 if c.cache.l1i < 0 { 716 c.cache.l1i = size 717 } 718 } 719 case 2: 720 c.cache.l2 = size 721 case 3: 722 c.cache.l3 = size 723 } 724 } 725 case amd: 726 // Untested. 727 if maxExtendedFunction() < 0x80000005 { 728 return 729 } 730 _, _, ecx, edx := cpuid(0x80000005) 731 c.cache.l1d = int(((ecx >> 24) & 0xFF) * 1024) 732 c.cache.l1i = int(((edx >> 24) & 0xFF) * 1024) 733 734 if maxExtendedFunction() < 0x80000006 { 735 return 736 } 737 _, _, ecx, _ = cpuid(0x80000006) 738 c.cache.l2 = int(((ecx >> 16) & 0xFFFF) * 1024) 739 } 740 741 return 742 } 743 744 func support() flags { 745 mfi := maxFunctionID() 746 vend := vendorID() 747 if mfi < 0x1 { 748 return 0 749 } 750 rval := uint64(0) 751 _, _, c, d := cpuid(1) 752 if (d & (1 << 15)) != 0 { 753 rval |= cmov 754 } 755 if (d & (1 << 23)) != 0 { 756 rval |= mmx 757 } 758 if (d & (1 << 25)) != 0 { 759 rval |= mmxext 760 } 761 if (d & (1 << 25)) != 0 { 762 rval |= sse 763 } 764 if (d & (1 << 26)) != 0 { 765 rval |= sse2 766 } 767 if (c & 1) != 0 { 768 rval |= sse3 769 } 770 if (c & 0x00000200) != 0 { 771 rval |= ssse3 772 } 773 if (c & 0x00080000) != 0 { 774 rval |= sse4 775 } 776 if (c & 0x00100000) != 0 { 777 rval |= sse42 778 } 779 if (c & (1 << 25)) != 0 { 780 rval |= aesni 781 } 782 if (c & (1 << 1)) != 0 { 783 rval |= clmul 784 } 785 if c&(1<<23) != 0 { 786 rval |= popcnt 787 } 788 if c&(1<<30) != 0 { 789 rval |= rdrand 790 } 791 if c&(1<<29) != 0 { 792 rval |= f16c 793 } 794 if c&(1<<13) != 0 { 795 rval |= cx16 796 } 797 if vend == intel && (d&(1<<28)) != 0 && mfi >= 4 { 798 if threadsPerCore() > 1 { 799 rval |= htt 800 } 801 } 802 803 // Check XGETBV, OXSAVE and AVX bits 804 if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { 805 // Check for OS support 806 eax, _ := xgetbv(0) 807 if (eax & 0x6) == 0x6 { 808 rval |= avx 809 if (c & 0x00001000) != 0 { 810 rval |= fma3 811 } 812 } 813 } 814 815 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. 816 if mfi >= 7 { 817 _, ebx, ecx, _ := cpuidex(7, 0) 818 if (rval&avx) != 0 && (ebx&0x00000020) != 0 { 819 rval |= avx2 820 } 821 if (ebx & 0x00000008) != 0 { 822 rval |= bmi1 823 if (ebx & 0x00000100) != 0 { 824 rval |= bmi2 825 } 826 } 827 if ebx&(1<<4) != 0 { 828 rval |= hle 829 } 830 if ebx&(1<<9) != 0 { 831 rval |= erms 832 } 833 if ebx&(1<<11) != 0 { 834 rval |= rtm 835 } 836 if ebx&(1<<14) != 0 { 837 rval |= mpx 838 } 839 if ebx&(1<<18) != 0 { 840 rval |= rdseed 841 } 842 if ebx&(1<<19) != 0 { 843 rval |= adx 844 } 845 if ebx&(1<<29) != 0 { 846 rval |= sha 847 } 848 849 // Only detect AVX-512 features if XGETBV is supported 850 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { 851 // Check for OS support 852 eax, _ := xgetbv(0) 853 854 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and 855 // ZMM16-ZMM31 state are enabled by OS) 856 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). 857 if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { 858 if ebx&(1<<16) != 0 { 859 rval |= avx512f 860 } 861 if ebx&(1<<17) != 0 { 862 rval |= avx512dq 863 } 864 if ebx&(1<<21) != 0 { 865 rval |= avx512ifma 866 } 867 if ebx&(1<<26) != 0 { 868 rval |= avx512pf 869 } 870 if ebx&(1<<27) != 0 { 871 rval |= avx512er 872 } 873 if ebx&(1<<28) != 0 { 874 rval |= avx512cd 875 } 876 if ebx&(1<<30) != 0 { 877 rval |= avx512bw 878 } 879 if ebx&(1<<31) != 0 { 880 rval |= avx512vl 881 } 882 // ecx 883 if ecx&(1<<1) != 0 { 884 rval |= avx512vbmi 885 } 886 } 887 } 888 } 889 890 if maxExtendedFunction() >= 0x80000001 { 891 _, _, c, d := cpuid(0x80000001) 892 if (c & (1 << 5)) != 0 { 893 rval |= lzcnt 894 rval |= popcnt 895 } 896 if (d & (1 << 31)) != 0 { 897 rval |= amd3dnow 898 } 899 if (d & (1 << 30)) != 0 { 900 rval |= amd3dnowext 901 } 902 if (d & (1 << 23)) != 0 { 903 rval |= mmx 904 } 905 if (d & (1 << 22)) != 0 { 906 rval |= mmxext 907 } 908 if (c & (1 << 6)) != 0 { 909 rval |= sse4a 910 } 911 if d&(1<<20) != 0 { 912 rval |= nx 913 } 914 if d&(1<<27) != 0 { 915 rval |= rdtscp 916 } 917 918 /* Allow for selectively disabling SSE2 functions on AMD processors 919 with SSE2 support but not SSE4a. This includes Athlon64, some 920 Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster 921 than SSE2 often enough to utilize this special-case flag. 922 AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case 923 so that SSE2 is used unless explicitly disabled by checking 924 AV_CPU_FLAG_SSE2SLOW. */ 925 if vendorID() != intel && 926 rval&sse2 != 0 && (c&0x00000040) == 0 { 927 rval |= sse2slow 928 } 929 930 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be 931 * used unless the OS has AVX support. */ 932 if (rval & avx) != 0 { 933 if (c & 0x00000800) != 0 { 934 rval |= xop 935 } 936 if (c & 0x00010000) != 0 { 937 rval |= fma4 938 } 939 } 940 941 if vendorID() == intel { 942 family, model := familyModel() 943 if family == 6 && (model == 9 || model == 13 || model == 14) { 944 /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 945 * 6/14 (core1 "yonah") theoretically support sse2, but it's 946 * usually slower than mmx. */ 947 if (rval & sse2) != 0 { 948 rval |= sse2slow 949 } 950 if (rval & sse3) != 0 { 951 rval |= sse3slow 952 } 953 } 954 /* The Atom processor has SSSE3 support, which is useful in many cases, 955 * but sometimes the SSSE3 version is slower than the SSE2 equivalent 956 * on the Atom, but is generally faster on other processors supporting 957 * SSSE3. This flag allows for selectively disabling certain SSSE3 958 * functions on the Atom. */ 959 if family == 6 && model == 28 { 960 rval |= atom 961 } 962 } 963 } 964 return flags(rval) 965 } 966 967 func valAsString(values ...uint32) []byte { 968 r := make([]byte, 4*len(values)) 969 for i, v := range values { 970 dst := r[i*4:] 971 dst[0] = byte(v & 0xff) 972 dst[1] = byte((v >> 8) & 0xff) 973 dst[2] = byte((v >> 16) & 0xff) 974 dst[3] = byte((v >> 24) & 0xff) 975 switch { 976 case dst[0] == 0: 977 return r[:i*4] 978 case dst[1] == 0: 979 return r[:i*4+1] 980 case dst[2] == 0: 981 return r[:i*4+2] 982 case dst[3] == 0: 983 return r[:i*4+3] 984 } 985 } 986 return r 987 }