github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/x86_64-defs.m4 (about)

     1  divert(-1)
     2  
     3  dnl  m4 macros for amd64 assembler.
     4  
     5  dnl  Copyright 1999-2005, 2008, 2009, 2011-2013 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  
    34  dnl  Usage: CPUVEC_FUNCS_LIST
    35  dnl
    36  dnl  A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
    37  dnl  order they appear in that structure.
    38  
    39  define(CPUVEC_FUNCS_LIST,
    40  ``add_n',
    41  `addlsh1_n',
    42  `addlsh2_n',
    43  `addmul_1',
    44  `addmul_2',
    45  `bdiv_dbm1c',
    46  `cnd_add_n',
    47  `cnd_sub_n',
    48  `com',
    49  `copyd',
    50  `copyi',
    51  `divexact_1',
    52  `divrem_1',
    53  `gcd_1',
    54  `lshift',
    55  `lshiftc',
    56  `mod_1',
    57  `mod_1_1p',
    58  `mod_1_1p_cps',
    59  `mod_1s_2p',
    60  `mod_1s_2p_cps',
    61  `mod_1s_4p',
    62  `mod_1s_4p_cps',
    63  `mod_34lsub1',
    64  `modexact_1c_odd',
    65  `mul_1',
    66  `mul_basecase',
    67  `mullo_basecase',
    68  `preinv_divrem_1',
    69  `preinv_mod_1',
    70  `redc_1',
    71  `redc_2',
    72  `rshift',
    73  `sqr_basecase',
    74  `sub_n',
    75  `sublsh1_n',
    76  `submul_1'')
    77  
    78  
    79  dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
    80  dnl
    81  dnl  In the amd64 code we use explicit TEXT and ALIGN() calls in the code,
    82  dnl  since different alignments are wanted in various circumstances.  So for
    83  dnl  instance,
    84  dnl
    85  dnl                  TEXT
    86  dnl                  ALIGN(16)
    87  dnl          PROLOGUE(mpn_add_n)
    88  dnl                  ...
    89  dnl          EPILOGUE()
    90  
    91  define(`PROLOGUE_cpu',
    92  m4_assert_numargs(1)
    93  `	GLOBL	$1
    94  	TYPE($1,`function')
    95  $1:
    96  ')
    97  
    98  
    99  dnl  Usage: ASSERT([cond][,instructions])
   100  dnl
   101  dnl  If WANT_ASSERT is 1, output the given instructions and expect the given
   102  dnl  flags condition to then be satisfied.  For example,
   103  dnl
   104  dnl         ASSERT(ne, `cmpq %rax, %rbx')
   105  dnl
   106  dnl  The instructions can be omitted to just assert a flags condition with
   107  dnl  no extra calculation.  For example,
   108  dnl
   109  dnl         ASSERT(nc)
   110  dnl
   111  dnl  When `instructions' is not empty, a pushfq/popfq is added for
   112  dnl  convenience to preserve the flags, but the instructions themselves must
   113  dnl  preserve any registers that matter.
   114  dnl
   115  dnl  The condition can be omitted to just output the given instructions when
   116  dnl  assertion checking is wanted.  In this case the pushf/popf is omitted.
   117  dnl  For example,
   118  dnl
   119  dnl         ASSERT(, `movq %rax, VAR_KEEPVAL')
   120  
   121  define(ASSERT,
   122  m4_assert_numargs_range(1,2)
   123  m4_assert_defined(`WANT_ASSERT')
   124  `ifelse(WANT_ASSERT,1,
   125  `ifelse(`$1',,
   126  `	$2',
   127  `ifelse(`$2',,,
   128  `	pushfq')
   129  	$2
   130  	`j$1'	L(ASSERT_ok`'ASSERT_counter)
   131  	ud2	C assertion failed
   132  L(ASSERT_ok`'ASSERT_counter):
   133  ifelse(`$2',,,`	popfq')
   134  define(`ASSERT_counter',incr(ASSERT_counter))')')')
   135  
   136  define(ASSERT_counter,1)
   137  
   138  define(`LEA',`dnl
   139  ifdef(`PIC',
   140  	`mov	$1@GOTPCREL(%rip), $2'
   141  ,
   142  	`movabs	`$'$1, $2')
   143  ')
   144  
   145  
   146  define(`DEF_OBJECT',
   147  m4_assert_numargs_range(1,2)
   148  `	RODATA
   149  	ALIGN(ifelse($#,1,2,$2))
   150  $1:
   151  ')
   152  
   153  define(`END_OBJECT',
   154  m4_assert_numargs(1)
   155  `	SIZE(`$1',.-`$1')')
   156  
   157  
   158  define(`R32',
   159  	`ifelse($1,`%rax',`%eax',
   160  		$1,`%rbx',`%ebx',
   161  		$1,`%rcx',`%ecx',
   162  		$1,`%rdx',`%edx',
   163  		$1,`%rsi',`%esi',
   164  		$1,`%rdi',`%edi',
   165  		$1,`%rbp',`%ebp',
   166  		$1,`%r8',`%r8d',
   167  		$1,`%r9',`%r9d',
   168  		$1,`%r10',`%r10d',
   169  		$1,`%r11',`%r11d',
   170  		$1,`%r12',`%r12d',
   171  		$1,`%r13',`%r13d',
   172  		$1,`%r14',`%r14d',
   173  		$1,`%r15',`%r15d')')
   174  define(`R8',
   175  	`ifelse($1,`%rax',`%al',
   176  		$1,`%rbx',`%bl',
   177  		$1,`%rcx',`%cl',
   178  		$1,`%rdx',`%dl',
   179  		$1,`%rsi',`%sil',
   180  		$1,`%rdi',`%dil',
   181  		$1,`%rbp',`%bpl',
   182  		$1,`%r8',`%r8b',
   183  		$1,`%r9',`%r9b',
   184  		$1,`%r10',`%r10b',
   185  		$1,`%r11',`%r11b',
   186  		$1,`%r12',`%r12b',
   187  		$1,`%r13',`%r13b',
   188  		$1,`%r14',`%r14b',
   189  		$1,`%r15',`%r15b')')
   190  
   191  
   192  dnl  Usage: CALL(funcname)
   193  dnl
   194  
   195  define(`CALL',`dnl
   196  ifdef(`PIC',
   197  	`call	GSYM_PREFIX`'$1@PLT'
   198  ,
   199  	`call	GSYM_PREFIX`'$1'
   200  )')
   201  
   202  
   203  define(`JUMPTABSECT', `.section	.data.rel.ro.local,"aw",@progbits')
   204  
   205  
   206  dnl  Usage: JMPENT(targlabel,tablabel)
   207  
   208  define(`JMPENT',`dnl
   209  ifdef(`PIC',
   210  	`.long	$1-$2'dnl
   211  ,
   212  	`.quad	$1'dnl
   213  )')
   214  
   215  
   216  dnl  These macros are defined just for DOS64, where they provide calling
   217  dnl  sequence glue code.
   218  
   219  define(`FUNC_ENTRY',`')
   220  define(`FUNC_EXIT',`')
   221  
   222  
   223  dnl  Target ABI macros.
   224  
   225  define(`IFDOS',   `')
   226  define(`IFSTD',   `$1')
   227  define(`IFELF',   `$1')
   228  
   229  
   230  dnl  Usage: PROTECT(symbol)
   231  dnl
   232  dnl  Used for private GMP symbols that should never be overridden by users.
   233  dnl  This can save reloc entries and improve shlib sharing as well as
   234  dnl  application startup times
   235  
   236  define(`PROTECT',  `.hidden $1')
   237  
   238  
   239  dnl  Usage: x86_lookup(target, key,value, key,value, ...)
   240  dnl
   241  dnl  Look for `target' among the `key' parameters.
   242  dnl
   243  dnl  x86_lookup expands to the corresponding `value', or generates an error
   244  dnl  if `target' isn't found.
   245  
   246  define(x86_lookup,
   247  m4_assert_numargs_range(1,999)
   248  `ifelse(eval($#<3),1,
   249  `m4_error(`unrecognised part of x86 instruction: $1
   250  ')',
   251  `ifelse(`$1',`$2', `$3',
   252  `x86_lookup(`$1',shift(shift(shift($@))))')')')
   253  
   254  
   255  dnl  Usage: x86_opcode_regxmm(reg)
   256  dnl
   257  dnl  Validate the given xmm register, and return its number, 0 to 7.
   258  
   259  define(x86_opcode_regxmm,
   260  m4_assert_numargs(1)
   261  `x86_lookup(`$1',x86_opcode_regxmm_list)')
   262  
   263  define(x86_opcode_regxmm_list,
   264  ``%xmm0',0,
   265  `%xmm1',1,
   266  `%xmm2',2,
   267  `%xmm3',3,
   268  `%xmm4',4,
   269  `%xmm5',5,
   270  `%xmm6',6,
   271  `%xmm7',7,
   272  `%xmm8',8,
   273  `%xmm9',9,
   274  `%xmm10',10,
   275  `%xmm11',11,
   276  `%xmm12',12,
   277  `%xmm13',13,
   278  `%xmm14',14,
   279  `%xmm15',15')
   280  
   281  dnl  Usage: palignr($imm,%srcreg,%dstreg)
   282  dnl
   283  dnl  Emit a palignr instruction, using a .byte sequence, since obsolete but
   284  dnl  still distributed versions of gas don't know SSSE3 instructions.
   285  
   286  define(`palignr',
   287  m4_assert_numargs(3)
   288  `.byte	0x66,dnl
   289  ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1,
   290         `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl
   291  0x0f,0x3a,0x0f,dnl
   292  eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl
   293  substr($1,1)')
   294  
   295  
   296  dnl  Usage
   297  dnl
   298  dnl    regnum(op)   raw operand index (so slightly misnamed)
   299  dnl    regnumh(op)  high bit of register operand nimber
   300  dnl    ix(op)       0 for reg operand, 1 for plain pointer operand.
   301  dnl
   302  
   303  define(`regnum',`x86_lookup(`$1',oplist)')
   304  define(`regnumh',`eval(regnum($1)/8 & 1)')
   305  define(`ix',`eval(regnum($1)/16)')
   306  define(`oplist',
   307  ``%rax',   0, `%rcx',   1, `%rdx',   2,  `%rbx',   3,
   308   `%rsp',   4, `%rbp',   5, `%rsi',   6,  `%rdi',   7,
   309   `%r8',    8, `%r9',    9, `%r10',  10,  `%r11',  11,
   310   `%r12',  12, `%r13',  13, `%r14',  14,  `%r15',  15,
   311   `(%rax)',16, `(%rcx)',17, `(%rdx)',18,  `(%rbx)',19,
   312   `(%rsp)',20, `(%rbp)',21, `(%rsi)',22,  `(%rdi)',23,
   313   `(%r8)', 24, `(%r9)', 25, `(%r10)',26,  `(%r11)',27,
   314   `(%r12)',28, `(%r13)',29, `(%r14)',30,  `(%r15)',31')
   315  
   316  
   317  dnl  Usage
   318  dnl
   319  dnl     mulx(reg1,reg2,reg3)
   320  dnl
   321  dnl  or
   322  dnl
   323  dnl     mulx((reg1),reg2,reg3)
   324  dnl
   325  dnl  where reg1 is any register but rsp,rbp,r12,r13, or
   326  dnl
   327  dnl     mulx(off,(reg1),reg2,reg3)
   328  dnl
   329  dnl  where reg1 is any register but rsp,r12.
   330  dnl
   331  dnl  The exceptions are due to special coding needed for some registers; rsp
   332  dnl  and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
   333  dnl  offset-less form.
   334  dnl
   335  dnl  Other addressing forms are not handled.  Invalid forms are not properly
   336  dnl  detected.  Offsets that don't fit one byte are not handled correctly.
   337  
   338  define(`mulx',`dnl
   339  .byte	0xc4`'dnl
   340  ifelse(`$#',3,`dnl
   341  ,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl
   342  ,eval(0xfb-8*regnum($2))`'dnl
   343  ,0xf6`'dnl
   344  ,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl
   345  ',`$#',4,`dnl
   346  ,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl
   347  ,eval(0xfb-8*regnum($3))`'dnl
   348  ,0xf6`'dnl
   349  ,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl
   350  ,eval(($1 + 256) % 256)`'dnl
   351  ')')
   352  
   353  dnl  Usage
   354  dnl
   355  dnl     adcx(reg1,reg2)
   356  dnl     adox(reg1,reg2)
   357  dnl
   358  dnl  or
   359  dnl
   360  dnl     adcx((reg1),reg2)
   361  dnl     adox((reg1),reg2)
   362  dnl
   363  dnl  where reg1 is any register but rsp,rbp,r12,r13, or
   364  dnl
   365  dnl     adcx(off,(reg1),reg2)
   366  dnl     adox(off,(reg1),reg2)
   367  dnl
   368  dnl  where reg1 is any register but rsp,r12.
   369  dnl
   370  dnl  The exceptions are due to special coding needed for some registers; rsp
   371  dnl  and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
   372  dnl  offset-less form.
   373  dnl
   374  dnl  Other addressing forms are not handled.  Invalid forms are not properly
   375  dnl  detected.  Offsets that don't fit one byte are not handled correctly.
   376  
   377  define(`adx_helper',`dnl
   378  ,eval(0x48+regnumh($1)+4*regnumh($2))`'dnl
   379  ,0x0f`'dnl
   380  ,0x38`'dnl
   381  ,0xf6`'dnl
   382  ')
   383  
   384  define(`adx',`dnl
   385  ifelse(`$#',2,`dnl
   386  adx_helper($1,$2)dnl
   387  ,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($2))-0xc0*ix($1))`'dnl
   388  ',`$#',3,`dnl
   389  adx_helper($2,$3)dnl
   390  ,eval(0x40+(7 & regnum($2))+8*(7 & regnum($3)))`'dnl
   391  ,eval(($1 + 256) % 256)`'dnl
   392  ')')
   393  
   394  define(`adcx',`dnl
   395  .byte	0x66`'dnl
   396  adx($@)')
   397  
   398  define(`adox',`dnl
   399  .byte	0xf3`'dnl
   400  adx($@)')
   401  
   402  divert`'dnl