github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/fat/fat_entry.asm (about)

     1  dnl  x86 fat binary entrypoints.
     2  
     3  dnl  Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
     4  dnl  Torbjorn Granlund (port to x86_64)
     5  
     6  dnl  Copyright 2003, 2009, 2011-2014, 2016 Free Software Foundation, Inc.
     7  
     8  dnl  This file is part of the GNU MP Library.
     9  dnl
    10  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    11  dnl  it under the terms of either:
    12  dnl
    13  dnl    * the GNU Lesser General Public License as published by the Free
    14  dnl      Software Foundation; either version 3 of the License, or (at your
    15  dnl      option) any later version.
    16  dnl
    17  dnl  or
    18  dnl
    19  dnl    * the GNU General Public License as published by the Free Software
    20  dnl      Foundation; either version 2 of the License, or (at your option) any
    21  dnl      later version.
    22  dnl
    23  dnl  or both in parallel, as here.
    24  dnl
    25  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    26  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    27  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    28  dnl  for more details.
    29  dnl
    30  dnl  You should have received copies of the GNU General Public License and the
    31  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    32  dnl  see https://www.gnu.org/licenses/.
    33  
    34  include(`../config.m4')
    35  
    36  
    37  dnl  Forcibly disable profiling.
    38  dnl
    39  dnl  The entrypoints and inits are small enough not to worry about, the real
    40  dnl  routines arrived at will have any profiling.  Also, the way the code
    41  dnl  here ends with a jump means we won't work properly with the
    42  dnl  "instrument" profiling scheme anyway.
    43  
    44  define(`WANT_PROFILING',no)
    45  
    46  
    47  dnl  We define PRETEND_PIC as a helper symbol, the use it for suppressing
    48  dnl  normal, fast call code, since that triggers problems on Darwin, OpenBSD
    49  dnl  and some versions of GNU/Linux.  This will go away when symbol hiding is
    50  dnl  finished.
    51  
    52  ifdef(`DARWIN',
    53  `define(`PRETEND_PIC')')
    54  ifdef(`OPENBSD',
    55  `define(`PRETEND_PIC')')
    56  ifdef(`LINUX',
    57  `define(`PRETEND_PIC')')
    58  ifdef(`PIC',
    59  `define(`PRETEND_PIC')')
    60  
    61  ABI_SUPPORT(DOS64)
    62  ABI_SUPPORT(STD64)
    63  
    64  	TEXT
    65  
    66  dnl  Usage: FAT_ENTRY(name, offset)
    67  dnl
    68  dnl  Emit a fat binary entrypoint function of the given name.  This is the
    69  dnl  normal entry for applications, eg. __gmpn_add_n.
    70  dnl
    71  dnl  The code simply jumps through the function pointer in __gmpn_cpuvec at
    72  dnl  the given "offset" (in bytes).
    73  dnl
    74  dnl  For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be
    75  dnl  fine for all x86s.
    76  dnl
    77  dnl  For ELF/DARWIN PIC, the jumps are 20 bytes each, and are best aligned to
    78  dnl  16 to ensure at least the first two instructions don't cross a cache line
    79  dnl  boundary.
    80  dnl
    81  dnl  For DOS64, the jumps are 6 bytes.  The same form works also for GNU/Linux
    82  dnl  (at least with certain assembler/linkers) but FreeBSD 8.2 crashes.  Not
    83  dnl  tested on Darwin, Slowaris, NetBSD, etc.
    84  dnl
    85  dnl  Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE
    86  dnl  grepping in configure, stopping that code trying to eval something with
    87  dnl  $1 in it.
    88  
    89  define(FAT_ENTRY,
    90  m4_assert_numargs(2)
    91  `ifdef(`HOST_DOS64',
    92  `	ALIGN(8)
    93  `'PROLOGUE($1)
    94  	jmp	*$2+GSYM_PREFIX`'__gmpn_cpuvec(%rip)
    95  EPILOGUE()
    96  ',
    97  `	ALIGN(ifdef(`PIC',16,8))
    98  `'PROLOGUE($1)
    99  ifdef(`PRETEND_PIC',
   100  `	LEA(	GSYM_PREFIX`'__gmpn_cpuvec, %rax)
   101  	jmp	*$2(%rax)
   102  ',`dnl non-PIC
   103  	jmp	*GSYM_PREFIX`'__gmpn_cpuvec+$2
   104  ')
   105  EPILOGUE()
   106  ')')
   107  
   108  
   109  dnl  FAT_ENTRY for each CPUVEC_FUNCS_LIST
   110  dnl
   111  
   112  define(`CPUVEC_offset',0)
   113  foreach(i,
   114  `FAT_ENTRY(MPN(i),CPUVEC_offset)
   115  define(`CPUVEC_offset',eval(CPUVEC_offset + 8))',
   116  CPUVEC_FUNCS_LIST)
   117  
   118  
   119  dnl  Usage: FAT_INIT(name, offset)
   120  dnl
   121  dnl  Emit a fat binary initializer function of the given name.  These
   122  dnl  functions are the initial values for the pointers in __gmpn_cpuvec.
   123  dnl
   124  dnl  The code simply calls __gmpn_cpuvec_init, and then jumps back through
   125  dnl  the __gmpn_cpuvec pointer, at the given "offset" (in bytes).
   126  dnl  __gmpn_cpuvec_init will have stored the address of the selected
   127  dnl  implementation there.
   128  dnl
   129  dnl  Only one of these routines will be executed, and only once, since after
   130  dnl  that all the __gmpn_cpuvec pointers go to real routines.  So there's no
   131  dnl  need for anything special here, just something small and simple.  To
   132  dnl  keep code size down, "fat_init" is a shared bit of code, arrived at
   133  dnl  with the offset in %al.  %al is used since the movb instruction is 2
   134  dnl  bytes where %eax would be 4.
   135  dnl
   136  dnl  Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the
   137  dnl  HAVE_NATIVE grepping in configure, preventing that code trying to eval
   138  dnl  something with $1 in it.
   139  dnl
   140  dnl  We need to preserve parameter registers over the __gmpn_cpuvec_init call
   141  
   142  define(FAT_INIT,
   143  m4_assert_numargs(2)
   144  `PROLOGUE($1)
   145  	mov	$`'$2, %al
   146  	jmp	L(fat_init)
   147  EPILOGUE()
   148  ')
   149  
   150  dnl  FAT_INIT for each CPUVEC_FUNCS_LIST
   151  dnl
   152  
   153  define(`CPUVEC_offset',0)
   154  foreach(i,
   155  `FAT_INIT(MPN(i`'_init),CPUVEC_offset)
   156  define(`CPUVEC_offset',eval(CPUVEC_offset + 1))',
   157  CPUVEC_FUNCS_LIST)
   158  
   159  L(fat_init):
   160  	C al	__gmpn_cpuvec byte offset
   161  
   162  	movzbl	%al, %eax
   163  IFSTD(`	push	%rdi	')
   164  IFSTD(`	push	%rsi	')
   165  	push	%rdx
   166  	push	%rcx
   167  	push	%r8
   168  	push	%r9
   169  	push	%rax
   170  	CALL(	__gmpn_cpuvec_init)
   171  	pop	%rax
   172  	pop	%r9
   173  	pop	%r8
   174  	pop	%rcx
   175  	pop	%rdx
   176  IFSTD(`	pop	%rsi	')
   177  IFSTD(`	pop	%rdi	')
   178  ifdef(`PRETEND_PIC',`
   179  	LEA(	GSYM_PREFIX`'__gmpn_cpuvec, %r10)
   180  	jmp	*(%r10,%rax,8)
   181  ',`dnl non-PIC
   182  	jmp	*GSYM_PREFIX`'__gmpn_cpuvec(,%rax,8)
   183  ')
   184  
   185  
   186  C long __gmpn_cpuid (char dst[12], int id);
   187  C
   188  C This is called only 3 times, so just something simple and compact is fine.
   189  C
   190  C The rcx/ecx zeroing here is needed for the BMI2 check.
   191  
   192  define(`rp',  `%rdi')
   193  define(`idx', `%rsi')
   194  
   195  PROLOGUE(__gmpn_cpuid)
   196  	FUNC_ENTRY(2)
   197  	mov	%rbx, %r8
   198  	mov	R32(idx), R32(%rax)
   199  	xor	%ecx, %ecx
   200  	cpuid
   201  	mov	%ebx, (rp)
   202  	mov	%edx, 4(rp)
   203  	mov	%ecx, 8(rp)
   204  	mov	%r8, %rbx
   205  	FUNC_EXIT()
   206  	ret
   207  EPILOGUE()