github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/fat/fat.c (about)

     1  /* x86_64 fat binary initializers.
     2  
     3     Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
     4     Torbjorn Granlund (port to x86_64)
     5  
     6     THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
     7     THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
     8     COMPLETELY IN FUTURE GNU MP RELEASES.
     9  
    10  Copyright 2003, 2004, 2009, 2011-2015 Free Software Foundation, Inc.
    11  
    12  This file is part of the GNU MP Library.
    13  
    14  The GNU MP Library is free software; you can redistribute it and/or modify
    15  it under the terms of either:
    16  
    17    * the GNU Lesser General Public License as published by the Free
    18      Software Foundation; either version 3 of the License, or (at your
    19      option) any later version.
    20  
    21  or
    22  
    23    * the GNU General Public License as published by the Free Software
    24      Foundation; either version 2 of the License, or (at your option) any
    25      later version.
    26  
    27  or both in parallel, as here.
    28  
    29  The GNU MP Library is distributed in the hope that it will be useful, but
    30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    32  for more details.
    33  
    34  You should have received copies of the GNU General Public License and the
    35  GNU Lesser General Public License along with the GNU MP Library.  If not,
    36  see https://www.gnu.org/licenses/.  */
    37  
    38  #include <stdio.h>    /* for printf */
    39  #include <stdlib.h>   /* for getenv */
    40  #include <string.h>
    41  
    42  #include "gmp.h"
    43  #include "gmp-impl.h"
    44  
    45  /* Change this to "#define TRACE(x) x" for some traces. */
    46  #define TRACE(x)
    47  
    48  
    49  /* fat_entry.asm */
    50  long __gmpn_cpuid (char [12], int);
    51  
    52  
    53  #if WANT_FAKE_CPUID
    54  /* The "name"s in the table are values for the GMP_CPU_TYPE environment
    55     variable.  Anything can be used, but for now it's the canonical cpu types
    56     as per config.guess/config.sub.  */
    57  
    58  #define __gmpn_cpuid            fake_cpuid
    59  
    60  #define MAKE_FMS(family, model)						\
    61    ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20)			\
    62     + (((model) & 0xf) << 4) + (((model)  &  0xf0) << 12))
    63  
    64  static struct {
    65    const char  *name;
    66    const char  *vendor;
    67    unsigned    fms;
    68  } fake_cpuid_table[] = {
    69    { "core2",      "GenuineIntel", MAKE_FMS (6, 0xf) },
    70    { "nehalem",    "GenuineIntel", MAKE_FMS (6, 0x1a) },
    71    { "nhm",        "GenuineIntel", MAKE_FMS (6, 0x1a) },
    72    { "atom",       "GenuineIntel", MAKE_FMS (6, 0x1c) },
    73    { "westmere",   "GenuineIntel", MAKE_FMS (6, 0x25) },
    74    { "wsm",        "GenuineIntel", MAKE_FMS (6, 0x25) },
    75    { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) },
    76    { "sbr",        "GenuineIntel", MAKE_FMS (6, 0x2a) },
    77    { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) },
    78    { "slm",        "GenuineIntel", MAKE_FMS (6, 0x37) },
    79    { "haswell",    "GenuineIntel", MAKE_FMS (6, 0x3c) },
    80    { "hwl",        "GenuineIntel", MAKE_FMS (6, 0x3c) },
    81    { "broadwell",  "GenuineIntel", MAKE_FMS (6, 0x3d) },
    82    { "bwl",        "GenuineIntel", MAKE_FMS (6, 0x3d) },
    83    { "skylake",    "GenuineIntel", MAKE_FMS (6, 0x5e) },
    84    { "sky",        "GenuineIntel", MAKE_FMS (6, 0x5e) },
    85    { "pentium4",   "GenuineIntel", MAKE_FMS (15, 3) },
    86  
    87    { "k8",         "AuthenticAMD", MAKE_FMS (15, 0) },
    88    { "k10",        "AuthenticAMD", MAKE_FMS (16, 0) },
    89    { "bobcat",     "AuthenticAMD", MAKE_FMS (20, 1) },
    90    { "bulldozer",  "AuthenticAMD", MAKE_FMS (21, 1) },
    91    { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) },
    92    { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) },
    93    { "excavator",  "AuthenticAMD", MAKE_FMS (21, 0x60) },
    94    { "jaguar",     "AuthenticAMD", MAKE_FMS (22, 1) },
    95  
    96    { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
    97  };
    98  
    99  static int
   100  fake_cpuid_lookup (void)
   101  {
   102    char  *s;
   103    int   i;
   104  
   105    s = getenv ("GMP_CPU_TYPE");
   106    if (s == NULL)
   107      {
   108        printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
   109        abort ();
   110      }
   111  
   112    for (i = 0; i < numberof (fake_cpuid_table); i++)
   113      if (strcmp (s, fake_cpuid_table[i].name) == 0)
   114        return i;
   115  
   116    printf ("GMP_CPU_TYPE=%s unknown\n", s);
   117    abort ();
   118  }
   119  
   120  static long
   121  fake_cpuid (char dst[12], unsigned int id)
   122  {
   123    int  i = fake_cpuid_lookup();
   124  
   125    switch (id) {
   126    case 0:
   127      memcpy (dst, fake_cpuid_table[i].vendor, 12);
   128      return 0;
   129    case 1:
   130      return fake_cpuid_table[i].fms;
   131    case 7:
   132      dst[0] = 0xff;				/* BMI1, AVX2, etc */
   133      dst[1] = 0xff;				/* BMI2, etc */
   134      return 0;
   135    case 0x80000001:
   136      dst[4 + 29 / 8] = (1 << (29 % 8));		/* "long" mode */
   137      return 0;
   138    default:
   139      printf ("fake_cpuid(): oops, unknown id %d\n", id);
   140      abort ();
   141    }
   142  }
   143  #endif
   144  
   145  
   146  typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
   147  typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
   148  
   149  struct cpuvec_t __gmpn_cpuvec = {
   150    __MPN(add_n_init),
   151    __MPN(addlsh1_n_init),
   152    __MPN(addlsh2_n_init),
   153    __MPN(addmul_1_init),
   154    __MPN(addmul_2_init),
   155    __MPN(bdiv_dbm1c_init),
   156    __MPN(cnd_add_n_init),
   157    __MPN(cnd_sub_n_init),
   158    __MPN(com_init),
   159    __MPN(copyd_init),
   160    __MPN(copyi_init),
   161    __MPN(divexact_1_init),
   162    __MPN(divrem_1_init),
   163    __MPN(gcd_1_init),
   164    __MPN(lshift_init),
   165    __MPN(lshiftc_init),
   166    __MPN(mod_1_init),
   167    __MPN(mod_1_1p_init),
   168    __MPN(mod_1_1p_cps_init),
   169    __MPN(mod_1s_2p_init),
   170    __MPN(mod_1s_2p_cps_init),
   171    __MPN(mod_1s_4p_init),
   172    __MPN(mod_1s_4p_cps_init),
   173    __MPN(mod_34lsub1_init),
   174    __MPN(modexact_1c_odd_init),
   175    __MPN(mul_1_init),
   176    __MPN(mul_basecase_init),
   177    __MPN(mullo_basecase_init),
   178    __MPN(preinv_divrem_1_init),
   179    __MPN(preinv_mod_1_init),
   180    __MPN(redc_1_init),
   181    __MPN(redc_2_init),
   182    __MPN(rshift_init),
   183    __MPN(sqr_basecase_init),
   184    __MPN(sub_n_init),
   185    __MPN(sublsh1_n_init),
   186    __MPN(submul_1_init),
   187    0
   188  };
   189  
   190  int __gmpn_cpuvec_initialized = 0;
   191  
   192  /* The following setups start with generic x86, then overwrite with
   193     specifics for a chip, and higher versions of that chip.
   194  
   195     The arrangement of the setups here will normally be the same as the $path
   196     selections in configure.in for the respective chips.
   197  
   198     This code is reentrant and thread safe.  We always calculate the same
   199     decided_cpuvec, so if two copies of the code are running it doesn't
   200     matter which completes first, both write the same to __gmpn_cpuvec.
   201  
   202     We need to go via decided_cpuvec because if one thread has completed
   203     __gmpn_cpuvec then it may be making use of the threshold values in that
   204     vector.  If another thread is still running __gmpn_cpuvec_init then we
   205     don't want it to write different values to those fields since some of the
   206     asm routines only operate correctly up to their own defined threshold,
   207     not an arbitrary value.  */
   208  
   209  static int
   210  gmp_workaround_skylake_cpuid_bug ()
   211  {
   212    char feature_string[49];
   213    char processor_name_string[49];
   214    static const char *bad_cpus[] = {" G44", " G45", " G39" /* , "6600" */ };
   215    int i;
   216  
   217    /* Example strings:                                   */
   218    /* "Intel(R) Pentium(R) CPU G4400 @ 3.30GHz"          */
   219    /* "Intel(R) Core(TM) i5-6600K CPU @ 3.50GHz"         */
   220    /*                  ^               ^               ^ */
   221    /*     0x80000002       0x80000003      0x80000004    */
   222    /* We match out just the 0x80000003 part here. */
   223  
   224    /* In their infinitive wisdom, Intel decided to use one register order for
   225       the vendor string, and another for the processor name string.  We shuffle
   226       things about here, rather than write a new variant of our assembly cpuid.
   227    */
   228  
   229    unsigned int eax, ebx, ecx, edx;
   230    eax = __gmpn_cpuid (feature_string, 0x80000003);
   231    ebx = ((unsigned int *)feature_string)[0];
   232    edx = ((unsigned int *)feature_string)[1];
   233    ecx = ((unsigned int *)feature_string)[2];
   234  
   235    ((unsigned int *) (processor_name_string))[0] = eax;
   236    ((unsigned int *) (processor_name_string))[1] = ebx;
   237    ((unsigned int *) (processor_name_string))[2] = ecx;
   238    ((unsigned int *) (processor_name_string))[3] = edx;
   239  
   240    processor_name_string[16] = 0;
   241  
   242    for (i = 0; i < sizeof (bad_cpus) / sizeof (char *); i++)
   243      {
   244        if (strstr (processor_name_string, bad_cpus[i]) != 0)
   245  	return 1;
   246      }
   247    return 0;
   248  }
   249  
   250  enum {BMI2_BIT = 8};
   251  
   252  void
   253  __gmpn_cpuvec_init (void)
   254  {
   255    struct cpuvec_t  decided_cpuvec;
   256    char vendor_string[13];
   257    char dummy_string[12];
   258    long fms;
   259    int family, model;
   260  
   261    TRACE (printf ("__gmpn_cpuvec_init:\n"));
   262  
   263    memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
   264  
   265    CPUVEC_SETUP_x86_64;
   266    CPUVEC_SETUP_fat;
   267  
   268    __gmpn_cpuid (vendor_string, 0);
   269    vendor_string[12] = 0;
   270  
   271    fms = __gmpn_cpuid (dummy_string, 1);
   272    family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
   273    model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
   274  
   275    /* Check extended feature flags */
   276    __gmpn_cpuid (dummy_string, 0x80000001);
   277    if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0)
   278      abort (); /* longmode-capable-bit turned off! */
   279  
   280    /*********************************************************/
   281    /*** WARNING: keep this list in sync with config.guess ***/
   282    /*********************************************************/
   283    if (strcmp (vendor_string, "GenuineIntel") == 0)
   284      {
   285        switch (family)
   286  	{
   287  	case 6:
   288  	  switch (model)
   289  	    {
   290  	    case 0x0f:		/* Conroe Merom Kentsfield Allendale */
   291  	    case 0x10:
   292  	    case 0x11:
   293  	    case 0x12:
   294  	    case 0x13:
   295  	    case 0x14:
   296  	    case 0x15:
   297  	    case 0x16:
   298  	    case 0x17:		/* PNR Wolfdale Yorkfield */
   299  	    case 0x18:
   300  	    case 0x19:
   301  	    case 0x1d:		/* PNR Dunnington */
   302  	      CPUVEC_SETUP_core2;
   303  	      break;
   304  
   305  	    case 0x1c:		/* Atom Silverthorne */
   306  	    case 0x26:		/* Atom Lincroft */
   307  	    case 0x27:		/* Atom Saltwell? */
   308  	    case 0x36:		/* Atom Cedarview/Saltwell */
   309  	      CPUVEC_SETUP_atom;
   310  	      break;
   311  
   312  	    case 0x1a:		/* NHM Gainestown */
   313  	    case 0x1b:
   314  	    case 0x1e:		/* NHM Lynnfield/Jasper */
   315  	    case 0x1f:
   316  	    case 0x20:
   317  	    case 0x21:
   318  	    case 0x22:
   319  	    case 0x23:
   320  	    case 0x24:
   321  	    case 0x25:		/* WSM Clarkdale/Arrandale */
   322  	    case 0x28:
   323  	    case 0x29:
   324  	    case 0x2b:
   325  	    case 0x2c:		/* WSM Gulftown */
   326  	    case 0x2e:		/* NHM Beckton */
   327  	    case 0x2f:		/* WSM Eagleton */
   328  	    case 0x37:		/* Silvermont */
   329  	    case 0x4a:		/* Silvermont */
   330  	    case 0x4c:		/* Airmont */
   331  	    case 0x4d:		/* Silvermont/Avoton */
   332  	    case 0x5a:		/* Silvermont */
   333  	    case 0x5c:		/* Goldmont */
   334  	    case 0x5f:		/* Goldmont */
   335  	      CPUVEC_SETUP_core2;
   336  	      CPUVEC_SETUP_coreinhm;
   337  	      break;
   338  
   339  	    case 0x2a:		/* SB */
   340  	    case 0x2d:		/* SBC-EP */
   341  	    case 0x3a:		/* IBR */
   342  	    case 0x3e:		/* IBR Ivytown */
   343  	      CPUVEC_SETUP_core2;
   344  	      CPUVEC_SETUP_coreinhm;
   345  	      CPUVEC_SETUP_coreisbr;
   346  	      break;
   347  	    case 0x3c:		/* Haswell client */
   348  	    case 0x3f:		/* Haswell server */
   349  	    case 0x45:		/* Haswell ULT */
   350  	    case 0x46:		/* Crystal Well */
   351  	      CPUVEC_SETUP_core2;
   352  	      CPUVEC_SETUP_coreinhm;
   353  	      CPUVEC_SETUP_coreisbr;
   354  	      /* Some Haswells lack BMI2.  Let them appear as Sandybridges for
   355  		 now.  */
   356  	      __gmpn_cpuid (dummy_string, 7);
   357  	      if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0)
   358  		break;
   359  	      CPUVEC_SETUP_coreihwl;
   360  	      break;
   361  	    case 0x3d:		/* Broadwell */
   362  	    case 0x47:		/* Broadwell */
   363  	    case 0x4f:		/* Broadwell server */
   364  	    case 0x56:		/* Broadwell microserver */
   365  	      CPUVEC_SETUP_core2;
   366  	      CPUVEC_SETUP_coreinhm;
   367  	      CPUVEC_SETUP_coreisbr;
   368  	      if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0)
   369  		break;
   370  	      CPUVEC_SETUP_coreihwl;
   371  	      CPUVEC_SETUP_coreibwl;
   372  	      break;
   373  	    case 0x4e:		/* Skylake client */
   374  	    case 0x55:		/* Skylake server */
   375  	    case 0x5e:		/* Skylake */
   376  	    case 0x8e:		/* Kabylake */
   377  	    case 0x9e:		/* Kabylake */
   378  	      CPUVEC_SETUP_core2;
   379  	      CPUVEC_SETUP_coreinhm;
   380  	      CPUVEC_SETUP_coreisbr;
   381  	      if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0)
   382  		break;
   383  	      if (gmp_workaround_skylake_cpuid_bug ())
   384  		break;
   385  	      CPUVEC_SETUP_coreihwl;
   386  	      CPUVEC_SETUP_coreibwl;
   387  	      CPUVEC_SETUP_skylake;
   388  	      break;
   389  	    }
   390  	  break;
   391  
   392  	case 15:
   393  	  CPUVEC_SETUP_pentium4;
   394  	  break;
   395  	}
   396      }
   397    else if (strcmp (vendor_string, "AuthenticAMD") == 0)
   398      {
   399        switch (family)
   400  	{
   401  	case 0x0f:		/* k8 */
   402  	case 0x11:		/* "fam 11h", mix of k8 and k10 */
   403  	case 0x13:
   404  	case 0x17:
   405  	  CPUVEC_SETUP_k8;
   406  	  break;
   407  
   408  	case 0x10:		/* k10 */
   409  	case 0x12:		/* k10 (llano) */
   410  	  CPUVEC_SETUP_k8;
   411  	  CPUVEC_SETUP_k10;
   412  	  break;
   413  
   414  	case 0x14:		/* bobcat */
   415  	case 0x16:		/* jaguar */
   416  	  CPUVEC_SETUP_k8;
   417  	  CPUVEC_SETUP_k10;
   418  	  CPUVEC_SETUP_bobcat;
   419  	  break;
   420  
   421  	case 0x15:	    /* bulldozer, piledriver, steamroller, excavator */
   422  	  CPUVEC_SETUP_k8;
   423  	  CPUVEC_SETUP_k10;
   424  	  CPUVEC_SETUP_bd1;
   425  	}
   426      }
   427    else if (strcmp (vendor_string, "CentaurHauls") == 0)
   428      {
   429        switch (family)
   430  	{
   431  	case 6:
   432  	  if (model >= 15)
   433  	    CPUVEC_SETUP_nano;
   434  	  break;
   435  	}
   436      }
   437  
   438    /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
   439       Instead default to the plain versions from whichever CPU we detected.
   440       The function arguments are compatible, no need for any glue code.  */
   441    if (decided_cpuvec.preinv_divrem_1 == NULL)
   442      decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
   443    if (decided_cpuvec.preinv_mod_1 == NULL)
   444      decided_cpuvec.preinv_mod_1    =(preinv_mod_1_t)   decided_cpuvec.mod_1;
   445  
   446    ASSERT_CPUVEC (decided_cpuvec);
   447    CPUVEC_INSTALL (decided_cpuvec);
   448  
   449    /* Set this once the threshold fields are ready.
   450       Use volatile to prevent it getting moved.  */
   451    *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
   452  }