github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/fat/fat.c (about) 1 /* x86_64 fat binary initializers. 2 3 Contributed to the GNU project by Kevin Ryde (original x86_32 code) and 4 Torbjorn Granlund (port to x86_64) 5 6 THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY. 7 THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR 8 COMPLETELY IN FUTURE GNU MP RELEASES. 9 10 Copyright 2003, 2004, 2009, 2011-2015 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of either: 16 17 * the GNU Lesser General Public License as published by the Free 18 Software Foundation; either version 3 of the License, or (at your 19 option) any later version. 20 21 or 22 23 * the GNU General Public License as published by the Free Software 24 Foundation; either version 2 of the License, or (at your option) any 25 later version. 26 27 or both in parallel, as here. 28 29 The GNU MP Library is distributed in the hope that it will be useful, but 30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32 for more details. 33 34 You should have received copies of the GNU General Public License and the 35 GNU Lesser General Public License along with the GNU MP Library. If not, 36 see https://www.gnu.org/licenses/. */ 37 38 #include <stdio.h> /* for printf */ 39 #include <stdlib.h> /* for getenv */ 40 #include <string.h> 41 42 #include "gmp.h" 43 #include "gmp-impl.h" 44 45 /* Change this to "#define TRACE(x) x" for some traces. */ 46 #define TRACE(x) 47 48 49 /* fat_entry.asm */ 50 long __gmpn_cpuid (char [12], int); 51 52 53 #if WANT_FAKE_CPUID 54 /* The "name"s in the table are values for the GMP_CPU_TYPE environment 55 variable. Anything can be used, but for now it's the canonical cpu types 56 as per config.guess/config.sub. */ 57 58 #define __gmpn_cpuid fake_cpuid 59 60 #define MAKE_FMS(family, model) \ 61 ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20) \ 62 + (((model) & 0xf) << 4) + (((model) & 0xf0) << 12)) 63 64 static struct { 65 const char *name; 66 const char *vendor; 67 unsigned fms; 68 } fake_cpuid_table[] = { 69 { "core2", "GenuineIntel", MAKE_FMS (6, 0xf) }, 70 { "nehalem", "GenuineIntel", MAKE_FMS (6, 0x1a) }, 71 { "nhm", "GenuineIntel", MAKE_FMS (6, 0x1a) }, 72 { "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) }, 73 { "westmere", "GenuineIntel", MAKE_FMS (6, 0x25) }, 74 { "wsm", "GenuineIntel", MAKE_FMS (6, 0x25) }, 75 { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) }, 76 { "sbr", "GenuineIntel", MAKE_FMS (6, 0x2a) }, 77 { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) }, 78 { "slm", "GenuineIntel", MAKE_FMS (6, 0x37) }, 79 { "haswell", "GenuineIntel", MAKE_FMS (6, 0x3c) }, 80 { "hwl", "GenuineIntel", MAKE_FMS (6, 0x3c) }, 81 { "broadwell", "GenuineIntel", MAKE_FMS (6, 0x3d) }, 82 { "bwl", "GenuineIntel", MAKE_FMS (6, 0x3d) }, 83 { "skylake", "GenuineIntel", MAKE_FMS (6, 0x5e) }, 84 { "sky", "GenuineIntel", MAKE_FMS (6, 0x5e) }, 85 { "pentium4", "GenuineIntel", MAKE_FMS (15, 3) }, 86 87 { "k8", "AuthenticAMD", MAKE_FMS (15, 0) }, 88 { "k10", "AuthenticAMD", MAKE_FMS (16, 0) }, 89 { "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) }, 90 { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) }, 91 { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) }, 92 { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) }, 93 { "excavator", "AuthenticAMD", MAKE_FMS (21, 0x60) }, 94 { "jaguar", "AuthenticAMD", MAKE_FMS (22, 1) }, 95 96 { "nano", "CentaurHauls", MAKE_FMS (6, 15) }, 97 }; 98 99 static int 100 fake_cpuid_lookup (void) 101 { 102 char *s; 103 int i; 104 105 s = getenv ("GMP_CPU_TYPE"); 106 if (s == NULL) 107 { 108 printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n"); 109 abort (); 110 } 111 112 for (i = 0; i < numberof (fake_cpuid_table); i++) 113 if (strcmp (s, fake_cpuid_table[i].name) == 0) 114 return i; 115 116 printf ("GMP_CPU_TYPE=%s unknown\n", s); 117 abort (); 118 } 119 120 static long 121 fake_cpuid (char dst[12], unsigned int id) 122 { 123 int i = fake_cpuid_lookup(); 124 125 switch (id) { 126 case 0: 127 memcpy (dst, fake_cpuid_table[i].vendor, 12); 128 return 0; 129 case 1: 130 return fake_cpuid_table[i].fms; 131 case 7: 132 dst[0] = 0xff; /* BMI1, AVX2, etc */ 133 dst[1] = 0xff; /* BMI2, etc */ 134 return 0; 135 case 0x80000001: 136 dst[4 + 29 / 8] = (1 << (29 % 8)); /* "long" mode */ 137 return 0; 138 default: 139 printf ("fake_cpuid(): oops, unknown id %d\n", id); 140 abort (); 141 } 142 } 143 #endif 144 145 146 typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t)); 147 typedef DECL_preinv_mod_1 ((*preinv_mod_1_t)); 148 149 struct cpuvec_t __gmpn_cpuvec = { 150 __MPN(add_n_init), 151 __MPN(addlsh1_n_init), 152 __MPN(addlsh2_n_init), 153 __MPN(addmul_1_init), 154 __MPN(addmul_2_init), 155 __MPN(bdiv_dbm1c_init), 156 __MPN(cnd_add_n_init), 157 __MPN(cnd_sub_n_init), 158 __MPN(com_init), 159 __MPN(copyd_init), 160 __MPN(copyi_init), 161 __MPN(divexact_1_init), 162 __MPN(divrem_1_init), 163 __MPN(gcd_1_init), 164 __MPN(lshift_init), 165 __MPN(lshiftc_init), 166 __MPN(mod_1_init), 167 __MPN(mod_1_1p_init), 168 __MPN(mod_1_1p_cps_init), 169 __MPN(mod_1s_2p_init), 170 __MPN(mod_1s_2p_cps_init), 171 __MPN(mod_1s_4p_init), 172 __MPN(mod_1s_4p_cps_init), 173 __MPN(mod_34lsub1_init), 174 __MPN(modexact_1c_odd_init), 175 __MPN(mul_1_init), 176 __MPN(mul_basecase_init), 177 __MPN(mullo_basecase_init), 178 __MPN(preinv_divrem_1_init), 179 __MPN(preinv_mod_1_init), 180 __MPN(redc_1_init), 181 __MPN(redc_2_init), 182 __MPN(rshift_init), 183 __MPN(sqr_basecase_init), 184 __MPN(sub_n_init), 185 __MPN(sublsh1_n_init), 186 __MPN(submul_1_init), 187 0 188 }; 189 190 int __gmpn_cpuvec_initialized = 0; 191 192 /* The following setups start with generic x86, then overwrite with 193 specifics for a chip, and higher versions of that chip. 194 195 The arrangement of the setups here will normally be the same as the $path 196 selections in configure.in for the respective chips. 197 198 This code is reentrant and thread safe. We always calculate the same 199 decided_cpuvec, so if two copies of the code are running it doesn't 200 matter which completes first, both write the same to __gmpn_cpuvec. 201 202 We need to go via decided_cpuvec because if one thread has completed 203 __gmpn_cpuvec then it may be making use of the threshold values in that 204 vector. If another thread is still running __gmpn_cpuvec_init then we 205 don't want it to write different values to those fields since some of the 206 asm routines only operate correctly up to their own defined threshold, 207 not an arbitrary value. */ 208 209 static int 210 gmp_workaround_skylake_cpuid_bug () 211 { 212 char feature_string[49]; 213 char processor_name_string[49]; 214 static const char *bad_cpus[] = {" G44", " G45", " G39" /* , "6600" */ }; 215 int i; 216 217 /* Example strings: */ 218 /* "Intel(R) Pentium(R) CPU G4400 @ 3.30GHz" */ 219 /* "Intel(R) Core(TM) i5-6600K CPU @ 3.50GHz" */ 220 /* ^ ^ ^ */ 221 /* 0x80000002 0x80000003 0x80000004 */ 222 /* We match out just the 0x80000003 part here. */ 223 224 /* In their infinitive wisdom, Intel decided to use one register order for 225 the vendor string, and another for the processor name string. We shuffle 226 things about here, rather than write a new variant of our assembly cpuid. 227 */ 228 229 unsigned int eax, ebx, ecx, edx; 230 eax = __gmpn_cpuid (feature_string, 0x80000003); 231 ebx = ((unsigned int *)feature_string)[0]; 232 edx = ((unsigned int *)feature_string)[1]; 233 ecx = ((unsigned int *)feature_string)[2]; 234 235 ((unsigned int *) (processor_name_string))[0] = eax; 236 ((unsigned int *) (processor_name_string))[1] = ebx; 237 ((unsigned int *) (processor_name_string))[2] = ecx; 238 ((unsigned int *) (processor_name_string))[3] = edx; 239 240 processor_name_string[16] = 0; 241 242 for (i = 0; i < sizeof (bad_cpus) / sizeof (char *); i++) 243 { 244 if (strstr (processor_name_string, bad_cpus[i]) != 0) 245 return 1; 246 } 247 return 0; 248 } 249 250 enum {BMI2_BIT = 8}; 251 252 void 253 __gmpn_cpuvec_init (void) 254 { 255 struct cpuvec_t decided_cpuvec; 256 char vendor_string[13]; 257 char dummy_string[12]; 258 long fms; 259 int family, model; 260 261 TRACE (printf ("__gmpn_cpuvec_init:\n")); 262 263 memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec)); 264 265 CPUVEC_SETUP_x86_64; 266 CPUVEC_SETUP_fat; 267 268 __gmpn_cpuid (vendor_string, 0); 269 vendor_string[12] = 0; 270 271 fms = __gmpn_cpuid (dummy_string, 1); 272 family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff); 273 model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0); 274 275 /* Check extended feature flags */ 276 __gmpn_cpuid (dummy_string, 0x80000001); 277 if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0) 278 abort (); /* longmode-capable-bit turned off! */ 279 280 /*********************************************************/ 281 /*** WARNING: keep this list in sync with config.guess ***/ 282 /*********************************************************/ 283 if (strcmp (vendor_string, "GenuineIntel") == 0) 284 { 285 switch (family) 286 { 287 case 6: 288 switch (model) 289 { 290 case 0x0f: /* Conroe Merom Kentsfield Allendale */ 291 case 0x10: 292 case 0x11: 293 case 0x12: 294 case 0x13: 295 case 0x14: 296 case 0x15: 297 case 0x16: 298 case 0x17: /* PNR Wolfdale Yorkfield */ 299 case 0x18: 300 case 0x19: 301 case 0x1d: /* PNR Dunnington */ 302 CPUVEC_SETUP_core2; 303 break; 304 305 case 0x1c: /* Atom Silverthorne */ 306 case 0x26: /* Atom Lincroft */ 307 case 0x27: /* Atom Saltwell? */ 308 case 0x36: /* Atom Cedarview/Saltwell */ 309 CPUVEC_SETUP_atom; 310 break; 311 312 case 0x1a: /* NHM Gainestown */ 313 case 0x1b: 314 case 0x1e: /* NHM Lynnfield/Jasper */ 315 case 0x1f: 316 case 0x20: 317 case 0x21: 318 case 0x22: 319 case 0x23: 320 case 0x24: 321 case 0x25: /* WSM Clarkdale/Arrandale */ 322 case 0x28: 323 case 0x29: 324 case 0x2b: 325 case 0x2c: /* WSM Gulftown */ 326 case 0x2e: /* NHM Beckton */ 327 case 0x2f: /* WSM Eagleton */ 328 case 0x37: /* Silvermont */ 329 case 0x4a: /* Silvermont */ 330 case 0x4c: /* Airmont */ 331 case 0x4d: /* Silvermont/Avoton */ 332 case 0x5a: /* Silvermont */ 333 case 0x5c: /* Goldmont */ 334 case 0x5f: /* Goldmont */ 335 CPUVEC_SETUP_core2; 336 CPUVEC_SETUP_coreinhm; 337 break; 338 339 case 0x2a: /* SB */ 340 case 0x2d: /* SBC-EP */ 341 case 0x3a: /* IBR */ 342 case 0x3e: /* IBR Ivytown */ 343 CPUVEC_SETUP_core2; 344 CPUVEC_SETUP_coreinhm; 345 CPUVEC_SETUP_coreisbr; 346 break; 347 case 0x3c: /* Haswell client */ 348 case 0x3f: /* Haswell server */ 349 case 0x45: /* Haswell ULT */ 350 case 0x46: /* Crystal Well */ 351 CPUVEC_SETUP_core2; 352 CPUVEC_SETUP_coreinhm; 353 CPUVEC_SETUP_coreisbr; 354 /* Some Haswells lack BMI2. Let them appear as Sandybridges for 355 now. */ 356 __gmpn_cpuid (dummy_string, 7); 357 if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0) 358 break; 359 CPUVEC_SETUP_coreihwl; 360 break; 361 case 0x3d: /* Broadwell */ 362 case 0x47: /* Broadwell */ 363 case 0x4f: /* Broadwell server */ 364 case 0x56: /* Broadwell microserver */ 365 CPUVEC_SETUP_core2; 366 CPUVEC_SETUP_coreinhm; 367 CPUVEC_SETUP_coreisbr; 368 if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0) 369 break; 370 CPUVEC_SETUP_coreihwl; 371 CPUVEC_SETUP_coreibwl; 372 break; 373 case 0x4e: /* Skylake client */ 374 case 0x55: /* Skylake server */ 375 case 0x5e: /* Skylake */ 376 case 0x8e: /* Kabylake */ 377 case 0x9e: /* Kabylake */ 378 CPUVEC_SETUP_core2; 379 CPUVEC_SETUP_coreinhm; 380 CPUVEC_SETUP_coreisbr; 381 if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0) 382 break; 383 if (gmp_workaround_skylake_cpuid_bug ()) 384 break; 385 CPUVEC_SETUP_coreihwl; 386 CPUVEC_SETUP_coreibwl; 387 CPUVEC_SETUP_skylake; 388 break; 389 } 390 break; 391 392 case 15: 393 CPUVEC_SETUP_pentium4; 394 break; 395 } 396 } 397 else if (strcmp (vendor_string, "AuthenticAMD") == 0) 398 { 399 switch (family) 400 { 401 case 0x0f: /* k8 */ 402 case 0x11: /* "fam 11h", mix of k8 and k10 */ 403 case 0x13: 404 case 0x17: 405 CPUVEC_SETUP_k8; 406 break; 407 408 case 0x10: /* k10 */ 409 case 0x12: /* k10 (llano) */ 410 CPUVEC_SETUP_k8; 411 CPUVEC_SETUP_k10; 412 break; 413 414 case 0x14: /* bobcat */ 415 case 0x16: /* jaguar */ 416 CPUVEC_SETUP_k8; 417 CPUVEC_SETUP_k10; 418 CPUVEC_SETUP_bobcat; 419 break; 420 421 case 0x15: /* bulldozer, piledriver, steamroller, excavator */ 422 CPUVEC_SETUP_k8; 423 CPUVEC_SETUP_k10; 424 CPUVEC_SETUP_bd1; 425 } 426 } 427 else if (strcmp (vendor_string, "CentaurHauls") == 0) 428 { 429 switch (family) 430 { 431 case 6: 432 if (model >= 15) 433 CPUVEC_SETUP_nano; 434 break; 435 } 436 } 437 438 /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1. 439 Instead default to the plain versions from whichever CPU we detected. 440 The function arguments are compatible, no need for any glue code. */ 441 if (decided_cpuvec.preinv_divrem_1 == NULL) 442 decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1; 443 if (decided_cpuvec.preinv_mod_1 == NULL) 444 decided_cpuvec.preinv_mod_1 =(preinv_mod_1_t) decided_cpuvec.mod_1; 445 446 ASSERT_CPUVEC (decided_cpuvec); 447 CPUVEC_INSTALL (decided_cpuvec); 448 449 /* Set this once the threshold fields are ready. 450 Use volatile to prevent it getting moved. */ 451 *((volatile int *) &__gmpn_cpuvec_initialized) = 1; 452 }