github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/x86-defs.m4 (about)

     1  divert(-1)
     2  
     3  dnl  m4 macros for x86 assembler.
     4  
     5  dnl  Copyright 1999-2003, 2007, 2010, 2012, 2014 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  
    34  dnl  Notes:
    35  dnl
    36  dnl  m4 isn't perfect for processing BSD style x86 assembler code, the main
    37  dnl  problems are,
    38  dnl
    39  dnl  1. Doing define(foo,123) and then using foo in an addressing mode like
    40  dnl     foo(%ebx) expands as a macro rather than a constant.  This is worked
    41  dnl     around by using deflit() from asm-defs.m4, instead of define().
    42  dnl
    43  dnl  2. Immediates in macro definitions need a space or `' to stop the $
    44  dnl     looking like a macro parameter.  For example,
    45  dnl
    46  dnl	        define(foo, `mov $ 123, %eax')
    47  dnl
    48  dnl     This is only a problem in macro definitions, not in ordinary text,
    49  dnl     and not in macro parameters like text passed to forloop() or ifdef().
    50  
    51  
    52  deflit(GMP_LIMB_BYTES, 4)
    53  
    54  
    55  dnl  Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL.  We
    56  dnl  undefine PIC since we don't need to be position independent in this
    57  dnl  case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc.
    58  
    59  ifdef(`DLL_EXPORT',`undefine(`PIC')')
    60  
    61  
    62  dnl  Usage: CPUVEC_FUNCS_LIST
    63  dnl
    64  dnl  A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
    65  dnl  order they appear in that structure.
    66  
    67  define(CPUVEC_FUNCS_LIST,
    68  ``add_n',
    69  `addlsh1_n',
    70  `addlsh2_n',
    71  `addmul_1',
    72  `addmul_2',
    73  `bdiv_dbm1c',
    74  `cnd_add_n',
    75  `cnd_sub_n',
    76  `com',
    77  `copyd',
    78  `copyi',
    79  `divexact_1',
    80  `divrem_1',
    81  `gcd_1',
    82  `lshift',
    83  `lshiftc',
    84  `mod_1',
    85  `mod_1_1p',
    86  `mod_1_1p_cps',
    87  `mod_1s_2p',
    88  `mod_1s_2p_cps',
    89  `mod_1s_4p',
    90  `mod_1s_4p_cps',
    91  `mod_34lsub1',
    92  `modexact_1c_odd',
    93  `mul_1',
    94  `mul_basecase',
    95  `mullo_basecase',
    96  `preinv_divrem_1',
    97  `preinv_mod_1',
    98  `redc_1',
    99  `redc_2',
   100  `rshift',
   101  `sqr_basecase',
   102  `sub_n',
   103  `sublsh1_n',
   104  `submul_1'')
   105  
   106  
   107  dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
   108  dnl
   109  dnl  In the x86 code we use explicit TEXT and ALIGN() calls in the code,
   110  dnl  since different alignments are wanted in various circumstances.  So for
   111  dnl  instance,
   112  dnl
   113  dnl                  TEXT
   114  dnl                  ALIGN(16)
   115  dnl          PROLOGUE(mpn_add_n)
   116  dnl          ...
   117  dnl          EPILOGUE()
   118  
   119  define(`PROLOGUE_cpu',
   120  m4_assert_numargs(1)
   121  m4_assert_defined(`WANT_PROFILING')
   122  	`GLOBL	$1
   123  	TYPE($1,`function')
   124  	COFF_TYPE($1)
   125  $1:
   126  ifelse(WANT_PROFILING,`prof',      `	call_mcount')
   127  ifelse(WANT_PROFILING,`gprof',     `	call_mcount')
   128  ifelse(WANT_PROFILING,`instrument',`	call_instrument(enter)')
   129  ')
   130  
   131  
   132  dnl  Usage: COFF_TYPE(GSYM_PREFIX`'foo)
   133  dnl
   134  dnl  Emit COFF style ".def ... .endef" type information for a function, when
   135  dnl  supported.  The argument should include any GSYM_PREFIX.
   136  dnl
   137  dnl  See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE.
   138  
   139  define(COFF_TYPE,
   140  m4_assert_numargs(1)
   141  m4_assert_defined(`HAVE_COFF_TYPE')
   142  `ifelse(HAVE_COFF_TYPE,yes,
   143  	`.def	$1
   144  	.scl	2
   145  	.type	32
   146  	.endef')')
   147  
   148  
   149  dnl  Usage: call_mcount
   150  dnl
   151  dnl  For `gprof' style profiling, %ebp is setup as a frame pointer.  None of
   152  dnl  the assembler routines use %ebp this way, so it's done only for the
   153  dnl  benefit of mcount.  glibc sysdeps/i386/i386-mcount.S shows how mcount
   154  dnl  gets the current function from (%esp) and the parent from 4(%ebp).
   155  dnl
   156  dnl  For `prof' style profiling gcc generates mcount calls without setting
   157  dnl  up %ebp, and the same is done here.
   158  
   159  define(`call_mcount',
   160  m4_assert_numargs(-1)
   161  m4_assert_defined(`WANT_PROFILING')
   162  m4_assert_defined(`MCOUNT_PIC_REG')
   163  m4_assert_defined(`MCOUNT_NONPIC_REG')
   164  m4_assert_defined(`MCOUNT_PIC_CALL')
   165  m4_assert_defined(`MCOUNT_NONPIC_CALL')
   166  `ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,,
   167  `	DATA
   168  	ALIGN(4)
   169  L(mcount_data_`'mcount_counter):
   170  	W32	0
   171  	TEXT
   172  ')dnl
   173  ifelse(WANT_PROFILING,`gprof',
   174  `	pushl	%ebp
   175  	movl	%esp, %ebp
   176  ')dnl
   177  ifdef(`PIC',
   178  `	pushl	%ebx
   179  	call_movl_eip_to_ebx
   180  L(mcount_here_`'mcount_counter):
   181  	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(mcount_here_`'mcount_counter)], %ebx
   182  ifelse(MCOUNT_PIC_REG,,,
   183  `	leal	L(mcount_data_`'mcount_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG')
   184  MCOUNT_PIC_CALL
   185  	popl	%ebx
   186  ',`dnl non-PIC
   187  ifelse(MCOUNT_NONPIC_REG,,,
   188  `	movl	`$'L(mcount_data_`'mcount_counter), MCOUNT_NONPIC_REG
   189  ')dnl
   190  MCOUNT_NONPIC_CALL
   191  ')dnl
   192  ifelse(WANT_PROFILING,`gprof',
   193  `	popl	%ebp
   194  ')
   195  define(`mcount_counter',incr(mcount_counter))
   196  ')
   197  
   198  define(mcount_counter,1)
   199  
   200  
   201  dnl  Usage: call_instrument(enter|exit)
   202  dnl
   203  dnl  Call __cyg_profile_func_enter or __cyg_profile_func_exit.
   204  dnl
   205  dnl  For PIC, most routines don't require _GLOBAL_OFFSET_TABLE_ themselves
   206  dnl  so %ebx is just setup for these calls.  It's a bit wasteful to repeat
   207  dnl  the setup for the exit call having done it earlier for the enter, but
   208  dnl  there's nowhere very convenient to hold %ebx through the length of a
   209  dnl  routine, in general.
   210  dnl
   211  dnl  For PIC, because instrument_current_function will be within the current
   212  dnl  object file we can get it just as an offset from %eip, there's no need
   213  dnl  to use the GOT.
   214  dnl
   215  dnl  No attempt is made to maintain the stack alignment gcc generates with
   216  dnl  -mpreferred-stack-boundary.  This wouldn't be hard, but it seems highly
   217  dnl  unlikely the instrumenting functions would be doing anything that'd
   218  dnl  benefit from alignment, in particular they're unlikely to be using
   219  dnl  doubles or long doubles on the stack.
   220  dnl
   221  dnl  The FRAME scheme is used to conveniently account for the register saves
   222  dnl  before accessing the return address.  Any previous value is saved and
   223  dnl  restored, since plenty of code keeps a value across a "ret" in the
   224  dnl  middle of a routine.
   225  
   226  define(call_instrument,
   227  m4_assert_numargs(1)
   228  `	pushdef(`FRAME',0)
   229  ifelse($1,exit,
   230  `	pushl	%eax	FRAME_pushl()	C return value
   231  ')
   232  ifdef(`PIC',
   233  `	pushl	%ebx	FRAME_pushl()
   234  	call_movl_eip_to_ebx
   235  L(instrument_here_`'instrument_count):
   236  	movl	%ebx, %ecx
   237  	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(instrument_here_`'instrument_count)], %ebx
   238  	C use addl rather than leal to avoid old gas bugs, see mpn/x86/README
   239  	addl	$instrument_current_function-L(instrument_here_`'instrument_count), %ecx
   240  	pushl	m4_empty_if_zero(FRAME)(%esp)	FRAME_pushl()	C return addr
   241  	pushl	%ecx				FRAME_pushl()	C this function
   242  	call	GSYM_PREFIX`'__cyg_profile_func_$1@PLT
   243  	addl	$`'8, %esp
   244  	popl	%ebx
   245  ',
   246  `	C non-PIC
   247  	pushl	m4_empty_if_zero(FRAME)(%esp)	FRAME_pushl()	C return addr
   248  	pushl	$instrument_current_function	FRAME_pushl()	C this function
   249  	call	GSYM_PREFIX`'__cyg_profile_func_$1
   250  	addl	$`'8, %esp
   251  ')
   252  ifelse($1,exit,
   253  `	popl	%eax			C return value
   254  ')
   255  	popdef(`FRAME')
   256  define(`instrument_count',incr(instrument_count))
   257  ')
   258  define(instrument_count,1)
   259  
   260  
   261  dnl  Usage: instrument_current_function
   262  dnl
   263  dnl  Return the current function name for instrumenting purposes.  This is
   264  dnl  PROLOGUE_current_function, but it sticks at the first such name seen.
   265  dnl
   266  dnl  Sticking to the first name seen ensures that multiple-entrypoint
   267  dnl  functions like mpn_add_nc and mpn_add_n will make enter and exit calls
   268  dnl  giving the same function address.
   269  
   270  define(instrument_current_function,
   271  m4_assert_numargs(-1)
   272  `ifdef(`instrument_current_function_seen',
   273  `instrument_current_function_seen',
   274  `define(`instrument_current_function_seen',PROLOGUE_current_function)dnl
   275  PROLOGUE_current_function')')
   276  
   277  
   278  dnl  Usage: call_movl_eip_to_ebx
   279  dnl
   280  dnl  Generate a call to L(movl_eip_to_ebx), and record the need for that
   281  dnl  routine.
   282  
   283  define(call_movl_eip_to_ebx,
   284  m4_assert_numargs(-1)
   285  `call	L(movl_eip_to_ebx)
   286  define(`movl_eip_to_ebx_needed',1)')
   287  
   288  dnl  Usage: generate_movl_eip_to_ebx
   289  dnl
   290  dnl  Emit a L(movl_eip_to_ebx) routine, if needed and not already generated.
   291  
   292  define(generate_movl_eip_to_ebx,
   293  m4_assert_numargs(-1)
   294  `ifelse(movl_eip_to_ebx_needed,1,
   295  `ifelse(movl_eip_to_ebx_done,1,,
   296  `L(movl_eip_to_ebx):
   297  	movl	(%esp), %ebx
   298  	ret_internal
   299  define(`movl_eip_to_ebx_done',1)
   300  ')')')
   301  
   302  
   303  dnl  Usage: ret
   304  dnl
   305  dnl  Generate a "ret", but if doing instrumented profiling then call
   306  dnl  __cyg_profile_func_exit first.
   307  
   308  define(ret,
   309  m4_assert_numargs(-1)
   310  m4_assert_defined(`WANT_PROFILING')
   311  `ifelse(WANT_PROFILING,instrument,
   312  `ret_instrument',
   313  `ret_internal')
   314  generate_movl_eip_to_ebx
   315  ')
   316  
   317  
   318  dnl  Usage: ret_internal
   319  dnl
   320  dnl  A plain "ret", without any __cyg_profile_func_exit call.  This can be
   321  dnl  used for a return which is internal to some function, such as when
   322  dnl  getting %eip for PIC.
   323  
   324  define(ret_internal,
   325  m4_assert_numargs(-1)
   326  ``ret'')
   327  
   328  
   329  dnl  Usage: ret_instrument
   330  dnl
   331  dnl  Generate call to __cyg_profile_func_exit and then a ret.  If a ret has
   332  dnl  already been seen from this function then jump to that chunk of code,
   333  dnl  rather than emitting it again.
   334  
   335  define(ret_instrument,
   336  m4_assert_numargs(-1)
   337  `ifelse(m4_unquote(ret_instrument_seen_`'instrument_current_function),1,
   338  `jmp	L(instrument_exit_`'instrument_current_function)',
   339  `define(ret_instrument_seen_`'instrument_current_function,1)
   340  L(instrument_exit_`'instrument_current_function):
   341  call_instrument(exit)
   342  	ret_internal')')
   343  
   344  
   345  dnl  Usage: _GLOBAL_OFFSET_TABLE_
   346  dnl
   347  dnl  Expand to _GLOBAL_OFFSET_TABLE_ plus any necessary underscore prefix.
   348  dnl  This lets us write plain _GLOBAL_OFFSET_TABLE_ in SVR4 style, but still
   349  dnl  work with systems requiring an extra underscore such as OpenBSD.
   350  dnl
   351  dnl  deflit is used so "leal _GLOBAL_OFFSET_TABLE_(%eax), %ebx" will come
   352  dnl  out right, though that form doesn't work properly in gas (see
   353  dnl  mpn/x86/README).
   354  
   355  deflit(_GLOBAL_OFFSET_TABLE_,
   356  m4_assert_defined(`GOT_GSYM_PREFIX')
   357  `GOT_GSYM_PREFIX`_GLOBAL_OFFSET_TABLE_'')
   358  
   359  
   360  dnl  --------------------------------------------------------------------------
   361  dnl  Various x86 macros.
   362  dnl
   363  
   364  
   365  dnl  Usage: ALIGN_OFFSET(bytes,offset)
   366  dnl
   367  dnl  Align to `offset' away from a multiple of `bytes'.
   368  dnl
   369  dnl  This is useful for testing, for example align to something very strict
   370  dnl  and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
   371  dnl
   372  dnl  Generally you wouldn't execute across the padding, but it's done with
   373  dnl  nop's so it'll work.
   374  
   375  define(ALIGN_OFFSET,
   376  m4_assert_numargs(2)
   377  `ALIGN($1)
   378  forloop(`i',1,$2,`	nop
   379  ')')
   380  
   381  
   382  dnl  Usage: defframe(name,offset)
   383  dnl
   384  dnl  Make a definition like the following with which to access a parameter
   385  dnl  or variable on the stack.
   386  dnl
   387  dnl         define(name,`FRAME+offset(%esp)')
   388  dnl
   389  dnl  Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
   390  dnl  byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).
   391  dnl  Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
   392  dnl  zero offset is wanted.
   393  dnl
   394  dnl  The new macro also gets a check that when it's used FRAME is actually
   395  dnl  defined, and that the final %esp offset isn't negative, which would
   396  dnl  mean an attempt to access something below the current %esp.
   397  dnl
   398  dnl  deflit() is used rather than a plain define(), so the new macro won't
   399  dnl  delete any following parenthesized expression.  name(%edi) will come
   400  dnl  out say as 16(%esp)(%edi).  This isn't valid assembler and should
   401  dnl  provoke an error, which is better than silently giving just 16(%esp).
   402  dnl
   403  dnl  See README for more on the suggested way to access the stack frame.
   404  
   405  define(defframe,
   406  m4_assert_numargs(2)
   407  `deflit(`$1',
   408  m4_assert_defined(`FRAME')
   409  `defframe_check_notbelow(`$1',$2,FRAME)dnl
   410  defframe_empty_if_zero(FRAME+($2))(%esp)')')
   411  
   412  dnl  Called: defframe_empty_if_zero(expression)
   413  define(defframe_empty_if_zero,
   414  m4_assert_numargs(1)
   415  `ifelse(defframe_empty_if_zero_disabled,1,
   416  `eval($1)',
   417  `m4_empty_if_zero($1)')')
   418  
   419  dnl  Called: defframe_check_notbelow(`name',offset,FRAME)
   420  define(defframe_check_notbelow,
   421  m4_assert_numargs(3)
   422  `ifelse(eval(($3)+($2)<0),1,
   423  `m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
   424  ')')')
   425  
   426  
   427  dnl  Usage: FRAME_pushl()
   428  dnl         FRAME_popl()
   429  dnl         FRAME_addl_esp(n)
   430  dnl         FRAME_subl_esp(n)
   431  dnl
   432  dnl  Adjust FRAME appropriately for a pushl or popl, or for an addl or subl
   433  dnl  %esp of n bytes.
   434  dnl
   435  dnl  Using these macros is completely optional.  Sometimes it makes more
   436  dnl  sense to put explicit deflit(`FRAME',N) forms, especially when there's
   437  dnl  jumps and different sequences of FRAME values need to be used in
   438  dnl  different places.
   439  
   440  define(FRAME_pushl,
   441  m4_assert_numargs(0)
   442  m4_assert_defined(`FRAME')
   443  `deflit(`FRAME',eval(FRAME+4))')
   444  
   445  define(FRAME_popl,
   446  m4_assert_numargs(0)
   447  m4_assert_defined(`FRAME')
   448  `deflit(`FRAME',eval(FRAME-4))')
   449  
   450  define(FRAME_addl_esp,
   451  m4_assert_numargs(1)
   452  m4_assert_defined(`FRAME')
   453  `deflit(`FRAME',eval(FRAME-($1)))')
   454  
   455  define(FRAME_subl_esp,
   456  m4_assert_numargs(1)
   457  m4_assert_defined(`FRAME')
   458  `deflit(`FRAME',eval(FRAME+($1)))')
   459  
   460  
   461  dnl  Usage: defframe_pushl(name)
   462  dnl
   463  dnl  Do a combination FRAME_pushl() and a defframe() to name the stack
   464  dnl  location just pushed.  This should come after a pushl instruction.
   465  dnl  Putting it on the same line works and avoids lengthening the code.  For
   466  dnl  example,
   467  dnl
   468  dnl         pushl   %eax     defframe_pushl(VAR_COUNTER)
   469  dnl
   470  dnl  Notice the defframe() is done with an unquoted -FRAME thus giving its
   471  dnl  current value without tracking future changes.
   472  
   473  define(defframe_pushl,
   474  m4_assert_numargs(1)
   475  `FRAME_pushl()defframe(`$1',-FRAME)')
   476  
   477  
   478  dnl  --------------------------------------------------------------------------
   479  dnl  Assembler instruction macros.
   480  dnl
   481  
   482  
   483  dnl  Usage: emms_or_femms
   484  dnl         femms_available_p
   485  dnl
   486  dnl  femms_available_p expands to 1 or 0 according to whether the AMD 3DNow
   487  dnl  femms instruction is available.  emms_or_femms expands to femms if
   488  dnl  available, or emms if not.
   489  dnl
   490  dnl  emms_or_femms is meant for use in the K6 directory where plain K6
   491  dnl  (without femms) and K6-2 and K6-3 (with a slightly faster femms) are
   492  dnl  supported together.
   493  dnl
   494  dnl  On K7 femms is no longer faster and is just an alias for emms, so plain
   495  dnl  emms may as well be used.
   496  
   497  define(femms_available_p,
   498  m4_assert_numargs(-1)
   499  `m4_ifdef_anyof_p(
   500  	`HAVE_HOST_CPU_k62',
   501  	`HAVE_HOST_CPU_k63',
   502  	`HAVE_HOST_CPU_athlon')')
   503  
   504  define(emms_or_femms,
   505  m4_assert_numargs(-1)
   506  `ifelse(femms_available_p,1,`femms',`emms')')
   507  
   508  
   509  dnl  Usage: femms
   510  dnl
   511  dnl  Gas 2.9.1 which comes with FreeBSD 3.4 doesn't support femms, so the
   512  dnl  following is a replacement using .byte.
   513  
   514  define(femms,
   515  m4_assert_numargs(-1)
   516  `.byte	15,14	C AMD 3DNow femms')
   517  
   518  
   519  dnl  Usage: jadcl0(op)
   520  dnl
   521  dnl  Generate a jnc/incl as a substitute for adcl $0,op.  Note this isn't an
   522  dnl  exact replacement, since it doesn't set the flags like adcl does.
   523  dnl
   524  dnl  This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and
   525  dnl  mpn_sqr_basecase because on K6 an adcl is slow, the branch
   526  dnl  misprediction penalty is small, and the multiply algorithm used leads
   527  dnl  to a carry bit on average only 1/4 of the time.
   528  dnl
   529  dnl  jadcl0_disabled can be set to 1 to instead generate an ordinary adcl
   530  dnl  for comparison.  For example,
   531  dnl
   532  dnl		define(`jadcl0_disabled',1)
   533  dnl
   534  dnl  When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is
   535  dnl  the same size as an adcl.  This makes it possible to use the exact same
   536  dnl  computed jump code when testing the relative speed of the two.
   537  
   538  define(jadcl0,
   539  m4_assert_numargs(1)
   540  `ifelse(jadcl0_disabled,1,
   541  	`adcl	$`'0, $1',
   542  	`jnc	L(jadcl0_`'jadcl0_counter)
   543  	incl	$1
   544  L(jadcl0_`'jadcl0_counter):
   545  define(`jadcl0_counter',incr(jadcl0_counter))')')
   546  
   547  define(jadcl0_counter,1)
   548  
   549  
   550  dnl  Usage: x86_lookup(target, key,value, key,value, ...)
   551  dnl         x86_lookup_p(target, key,value, key,value, ...)
   552  dnl
   553  dnl  Look for `target' among the `key' parameters.
   554  dnl
   555  dnl  x86_lookup expands to the corresponding `value', or generates an error
   556  dnl  if `target' isn't found.
   557  dnl
   558  dnl  x86_lookup_p expands to 1 if `target' is found, or 0 if not.
   559  
   560  define(x86_lookup,
   561  m4_assert_numargs_range(1,999)
   562  `ifelse(eval($#<3),1,
   563  `m4_error(`unrecognised part of x86 instruction: $1
   564  ')',
   565  `ifelse(`$1',`$2', `$3',
   566  `x86_lookup(`$1',shift(shift(shift($@))))')')')
   567  
   568  define(x86_lookup_p,
   569  m4_assert_numargs_range(1,999)
   570  `ifelse(eval($#<3),1, `0',
   571  `ifelse(`$1',`$2',    `1',
   572  `x86_lookup_p(`$1',shift(shift(shift($@))))')')')
   573  
   574  
   575  dnl  Usage: x86_opcode_reg32(reg)
   576  dnl         x86_opcode_reg32_p(reg)
   577  dnl
   578  dnl  x86_opcode_reg32 expands to the standard 3 bit encoding for the given
   579  dnl  32-bit register, eg. `%ebp' turns into 5.
   580  dnl
   581  dnl  x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0
   582  dnl  if not.
   583  
   584  define(x86_opcode_reg32,
   585  m4_assert_numargs(1)
   586  `x86_lookup(`$1',x86_opcode_reg32_list)')
   587  
   588  define(x86_opcode_reg32_p,
   589  m4_assert_onearg()
   590  `x86_lookup_p(`$1',x86_opcode_reg32_list)')
   591  
   592  define(x86_opcode_reg32_list,
   593  ``%eax',0,
   594  `%ecx',1,
   595  `%edx',2,
   596  `%ebx',3,
   597  `%esp',4,
   598  `%ebp',5,
   599  `%esi',6,
   600  `%edi',7')
   601  
   602  
   603  dnl  Usage: x86_opcode_tttn(cond)
   604  dnl
   605  dnl  Expand to the 4-bit "tttn" field value for the given x86 branch
   606  dnl  condition (like `c', `ae', etc).
   607  
   608  define(x86_opcode_tttn,
   609  m4_assert_numargs(1)
   610  `x86_lookup(`$1',x86_opcode_ttn_list)')
   611  
   612  define(x86_opcode_tttn_list,
   613  ``o',  0,
   614  `no',  1,
   615  `b',   2, `c',  2, `nae',2,
   616  `nb',  3, `nc', 3, `ae', 3,
   617  `e',   4, `z',  4,
   618  `ne',  5, `nz', 5,
   619  `be',  6, `na', 6,
   620  `nbe', 7, `a',  7,
   621  `s',   8,
   622  `ns',  9,
   623  `p',  10, `pe', 10, `npo',10,
   624  `np', 11, `npe',11, `po', 11,
   625  `l',  12, `nge',12,
   626  `nl', 13, `ge', 13,
   627  `le', 14, `ng', 14,
   628  `nle',15, `g',  15')
   629  
   630  
   631  dnl  Usage: cmovCC(%srcreg,%dstreg)
   632  dnl
   633  dnl  Emit a cmov instruction, using a .byte sequence, since various past
   634  dnl  versions of gas don't know cmov.  For example,
   635  dnl
   636  dnl         cmovz(  %eax, %ebx)
   637  dnl
   638  dnl  The source operand can only be a plain register.  (m4 code implementing
   639  dnl  full memory addressing modes exists, believe it or not, but isn't
   640  dnl  currently needed and isn't included.)
   641  dnl
   642  dnl  All the standard conditions are defined.  Attempting to use one without
   643  dnl  the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke
   644  dnl  an error.  This protects against writing something old gas wouldn't
   645  dnl  understand.
   646  
   647  dnl  Called: define_cmov_many(cond,tttn,cond,tttn,...)
   648  define(define_cmov_many,
   649  `ifelse(m4_length(`$1'),0,,
   650  `define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')')
   651  
   652  dnl  Called: define_cmov(cond,tttn)
   653  dnl  Emit basically define(cmov<cond>,`cmov_internal(<cond>,<ttn>,`$1',`$2')')
   654  define(define_cmov,
   655  m4_assert_numargs(2)
   656  `define(`cmov$1',
   657  m4_instruction_wrapper()
   658  m4_assert_numargs(2)
   659  `cmov_internal'(m4_doublequote($`'0),``$2'',dnl
   660  m4_doublequote($`'1),m4_doublequote($`'2)))')
   661  
   662  define_cmov_many(x86_opcode_tttn_list)
   663  
   664  dnl  Called: cmov_internal(name,tttn,src,dst)
   665  define(cmov_internal,
   666  m4_assert_numargs(4)
   667  `.byte	dnl
   668  15, dnl
   669  eval(64+$2), dnl
   670  eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
   671  	C `$1 $3, $4'')
   672  
   673  
   674  dnl  Usage: x86_opcode_regmmx(reg)
   675  dnl
   676  dnl  Validate the given mmx register, and return its number, 0 to 7.
   677  
   678  define(x86_opcode_regmmx,
   679  m4_assert_numargs(1)
   680  `x86_lookup(`$1',x86_opcode_regmmx_list)')
   681  
   682  define(x86_opcode_regmmx_list,
   683  ``%mm0',0,
   684  `%mm1',1,
   685  `%mm2',2,
   686  `%mm3',3,
   687  `%mm4',4,
   688  `%mm5',5,
   689  `%mm6',6,
   690  `%mm7',7')
   691  
   692  
   693  dnl  Usage: psadbw(%srcreg,%dstreg)
   694  dnl
   695  dnl  Oldish versions of gas don't know psadbw, in particular gas 2.9.1 on
   696  dnl  FreeBSD 3.3 and 3.4 doesn't, so instead emit .byte sequences.  For
   697  dnl  example,
   698  dnl
   699  dnl         psadbw( %mm1, %mm2)
   700  dnl
   701  dnl  Only register->register forms are supported here, which suffices for
   702  dnl  the current code.
   703  
   704  define(psadbw,
   705  m4_instruction_wrapper()
   706  m4_assert_numargs(2)
   707  `.byte 0x0f,0xf6,dnl
   708  eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl
   709  	C `psadbw $1, $2'')
   710  
   711  
   712  dnl  Usage: Zdisp(inst,op,op,op)
   713  dnl
   714  dnl  Generate explicit .byte sequences if necessary to force a byte-sized
   715  dnl  zero displacement on an instruction.  For example,
   716  dnl
   717  dnl         Zdisp(  movl,   0,(%esi), %eax)
   718  dnl
   719  dnl  expands to
   720  dnl
   721  dnl                 .byte   139,70,0  C movl 0(%esi), %eax
   722  dnl
   723  dnl  If the displacement given isn't 0, then normal assembler code is
   724  dnl  generated.  For example,
   725  dnl
   726  dnl         Zdisp(  movl,   4,(%esi), %eax)
   727  dnl
   728  dnl  expands to
   729  dnl
   730  dnl                 movl    4(%esi), %eax
   731  dnl
   732  dnl  This means a single Zdisp() form can be used with an expression for the
   733  dnl  displacement, and .byte will be used only if necessary.  The
   734  dnl  displacement argument is eval()ed.
   735  dnl
   736  dnl  Because there aren't many places a 0(reg) form is wanted, Zdisp is
   737  dnl  implemented with a table of instructions and encodings.  A new entry is
   738  dnl  needed for any different operation or registers.  The table is split
   739  dnl  into separate macros to avoid overflowing BSD m4 macro expansion space.
   740  
   741  define(Zdisp,
   742  m4_assert_numargs(4)
   743  `define(`Zdisp_found',0)dnl
   744  Zdisp_1($@)dnl
   745  Zdisp_2($@)dnl
   746  Zdisp_3($@)dnl
   747  Zdisp_4($@)dnl
   748  ifelse(Zdisp_found,0,
   749  `m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4
   750  ')')')
   751  
   752  define(Zdisp_1,`dnl
   753  Zdisp_match( adcl, 0,(%edx), %eax,        `0x13,0x42,0x00',           $@)`'dnl
   754  Zdisp_match( adcl, 0,(%edx), %ebx,        `0x13,0x5a,0x00',           $@)`'dnl
   755  Zdisp_match( adcl, 0,(%edx), %esi,        `0x13,0x72,0x00',           $@)`'dnl
   756  Zdisp_match( addl, %ebx, 0,(%edi),        `0x01,0x5f,0x00',           $@)`'dnl
   757  Zdisp_match( addl, %ecx, 0,(%edi),        `0x01,0x4f,0x00',           $@)`'dnl
   758  Zdisp_match( addl, %esi, 0,(%edi),        `0x01,0x77,0x00',           $@)`'dnl
   759  Zdisp_match( sbbl, 0,(%edx), %eax,        `0x1b,0x42,0x00',           $@)`'dnl
   760  Zdisp_match( sbbl, 0,(%edx), %esi,        `0x1b,0x72,0x00',           $@)`'dnl
   761  Zdisp_match( subl, %ecx, 0,(%edi),        `0x29,0x4f,0x00',           $@)`'dnl
   762  Zdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl
   763  Zdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl
   764  Zdisp_match( adc, 0,(%ebx,%ecx,4), %eax,  `0x13,0x44,0x8b,0x00',      $@)`'dnl
   765  Zdisp_match( sbb, 0,(%ebx,%ecx,4), %eax,  `0x1b,0x44,0x8b,0x00',      $@)`'dnl
   766  ')
   767  define(Zdisp_2,`dnl
   768  Zdisp_match( movl, %eax, 0,(%edi),        `0x89,0x47,0x00',           $@)`'dnl
   769  Zdisp_match( movl, %ebx, 0,(%edi),        `0x89,0x5f,0x00',           $@)`'dnl
   770  Zdisp_match( movl, %esi, 0,(%edi),        `0x89,0x77,0x00',           $@)`'dnl
   771  Zdisp_match( movl, 0,(%ebx), %eax,        `0x8b,0x43,0x00',           $@)`'dnl
   772  Zdisp_match( movl, 0,(%ebx), %esi,        `0x8b,0x73,0x00',           $@)`'dnl
   773  Zdisp_match( movl, 0,(%edx), %eax,        `0x8b,0x42,0x00',           $@)`'dnl
   774  Zdisp_match( movl, 0,(%esi), %eax,        `0x8b,0x46,0x00',           $@)`'dnl
   775  Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00',      $@)`'dnl
   776  Zdisp_match( mov, 0,(%esi,%ecx,4), %eax,  `0x8b,0x44,0x8e,0x00',      $@)`'dnl
   777  Zdisp_match( mov, %eax, 0,(%edi,%ecx,4),  `0x89,0x44,0x8f,0x00',      $@)`'dnl
   778  ')
   779  define(Zdisp_3,`dnl
   780  Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl
   781  Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl
   782  Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl
   783  Zdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl
   784  Zdisp_match( movq, 0,(%edx), %mm0,        `0x0f,0x6f,0x42,0x00',      $@)`'dnl
   785  Zdisp_match( movq, 0,(%esi), %mm0,        `0x0f,0x6f,0x46,0x00',      $@)`'dnl
   786  Zdisp_match( movq, %mm0, 0,(%edi),        `0x0f,0x7f,0x47,0x00',      $@)`'dnl
   787  Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl
   788  Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl
   789  Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl
   790  ')
   791  define(Zdisp_4,`dnl
   792  Zdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl
   793  Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl
   794  Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl
   795  Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl
   796  Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl
   797  Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl
   798  Zdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl
   799  ')
   800  
   801  define(Zdisp_match,
   802  m4_assert_numargs(9)
   803  `ifelse(eval(m4_stringequal_p(`$1',`$6')
   804  	&& m4_stringequal_p(`$2',0)
   805  	&& m4_stringequal_p(`$3',`$8')
   806  	&& m4_stringequal_p(`$4',`$9')),1,
   807  `define(`Zdisp_found',1)dnl
   808  ifelse(eval(`$7'),0,
   809  `	.byte	$5  C `$1 0$3, $4'',
   810  `	$6	$7$8, $9')',
   811  
   812  `ifelse(eval(m4_stringequal_p(`$1',`$6')
   813  	&& m4_stringequal_p(`$2',`$7')
   814  	&& m4_stringequal_p(`$3',0)
   815  	&& m4_stringequal_p(`$4',`$9')),1,
   816  `define(`Zdisp_found',1)dnl
   817  ifelse(eval(`$8'),0,
   818  `	.byte	$5  C `$1 $2, 0$4'',
   819  `	$6	$7, $8$9')')')')
   820  
   821  
   822  dnl  Usage: shldl(count,src,dst)
   823  dnl         shrdl(count,src,dst)
   824  dnl         shldw(count,src,dst)
   825  dnl         shrdw(count,src,dst)
   826  dnl
   827  dnl  Generate a double-shift instruction, possibly omitting a %cl count
   828  dnl  parameter if that's what the assembler requires, as indicated by
   829  dnl  WANT_SHLDL_CL in config.m4.  For example,
   830  dnl
   831  dnl         shldl(  %cl, %eax, %ebx)
   832  dnl
   833  dnl  turns into either
   834  dnl
   835  dnl         shldl   %cl, %eax, %ebx
   836  dnl  or
   837  dnl         shldl   %eax, %ebx
   838  dnl
   839  dnl  Immediate counts are always passed through unchanged.  For example,
   840  dnl
   841  dnl         shrdl(  $2, %esi, %edi)
   842  dnl  becomes
   843  dnl         shrdl   $2, %esi, %edi
   844  dnl
   845  dnl
   846  dnl  If you forget to use the macro form "shldl( ...)" and instead write
   847  dnl  just a plain "shldl ...", an error results.  This ensures the necessary
   848  dnl  variant treatment of %cl isn't accidentally bypassed.
   849  
   850  define(define_shd_instruction,
   851  m4_assert_numargs(1)
   852  `define($1,
   853  m4_instruction_wrapper()
   854  m4_assert_numargs(3)
   855  `shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
   856  m4_doublequote($`'2),m4_doublequote($`'3)))')
   857  
   858  dnl  Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
   859  define_shd_instruction(shldl)
   860  define_shd_instruction(shrdl)
   861  define_shd_instruction(shldw)
   862  define_shd_instruction(shrdw)
   863  
   864  dnl  Called: shd_instruction(op,count,src,dst)
   865  define(shd_instruction,
   866  m4_assert_numargs(4)
   867  m4_assert_defined(`WANT_SHLDL_CL')
   868  `ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
   869  ``$1'	`$3', `$4'',
   870  ``$1'	`$2', `$3', `$4'')')
   871  
   872  
   873  dnl  Usage: ASSERT([cond][,instructions])
   874  dnl
   875  dnl  If WANT_ASSERT is 1, output the given instructions and expect the given
   876  dnl  flags condition to then be satisfied.  For example,
   877  dnl
   878  dnl         ASSERT(ne, `cmpl %eax, %ebx')
   879  dnl
   880  dnl  The instructions can be omitted to just assert a flags condition with
   881  dnl  no extra calculation.  For example,
   882  dnl
   883  dnl         ASSERT(nc)
   884  dnl
   885  dnl  When `instructions' is not empty, a pushf/popf is added to preserve the
   886  dnl  flags, but the instructions themselves must preserve any registers that
   887  dnl  matter.  FRAME is adjusted for the push and pop, so the instructions
   888  dnl  given can use defframe() stack variables.
   889  dnl
   890  dnl  The condition can be omitted to just output the given instructions when
   891  dnl  assertion checking is wanted.  In this case the pushf/popf is omitted.
   892  dnl  For example,
   893  dnl
   894  dnl         ASSERT(, `movl %eax, VAR_KEEPVAL')
   895  
   896  define(ASSERT,
   897  m4_assert_numargs_range(1,2)
   898  m4_assert_defined(`WANT_ASSERT')
   899  `ifelse(WANT_ASSERT,1,
   900  `ifelse(`$1',,
   901  	`$2',
   902  	`C ASSERT
   903  ifelse(`$2',,,`	pushf	ifdef(`FRAME',`FRAME_pushl()')')
   904  	$2
   905  	j`$1'	L(ASSERT_ok`'ASSERT_counter)
   906  	ud2	C assertion failed
   907  L(ASSERT_ok`'ASSERT_counter):
   908  ifelse(`$2',,,`	popf	ifdef(`FRAME',`FRAME_popl()')')
   909  define(`ASSERT_counter',incr(ASSERT_counter))')')')
   910  
   911  define(ASSERT_counter,1)
   912  
   913  
   914  dnl  Usage: movl_text_address(label,register)
   915  dnl
   916  dnl  Get the address of a text segment label, using either a plain movl or a
   917  dnl  position-independent calculation, as necessary.  For example,
   918  dnl
   919  dnl         movl_code_address(L(foo),%eax)
   920  dnl
   921  dnl  This macro is only meant for use in ASSERT()s or when testing, since
   922  dnl  the PIC sequence it generates will want to be done with a ret balancing
   923  dnl  the call on CPUs with return address branch prediction.
   924  dnl
   925  dnl  The addl generated here has a backward reference to the label, and so
   926  dnl  won't suffer from the two forwards references bug in old gas (described
   927  dnl  in mpn/x86/README).
   928  
   929  define(movl_text_address,
   930  m4_assert_numargs(2)
   931  `ifdef(`PIC',
   932  	`call	L(movl_text_address_`'movl_text_address_counter)
   933  L(movl_text_address_`'movl_text_address_counter):
   934  	popl	$2	C %eip
   935  	addl	`$'$1-L(movl_text_address_`'movl_text_address_counter), $2
   936  define(`movl_text_address_counter',incr(movl_text_address_counter))',
   937  	`movl	`$'$1, $2')')
   938  
   939  define(movl_text_address_counter,1)
   940  
   941  
   942  dnl  Usage: notl_or_xorl_GMP_NUMB_MASK(reg)
   943  dnl
   944  dnl  Expand to either "notl `reg'" or "xorl $GMP_NUMB_BITS,`reg'" as
   945  dnl  appropriate for nails in use or not.
   946  
   947  define(notl_or_xorl_GMP_NUMB_MASK,
   948  m4_assert_numargs(1)
   949  `ifelse(GMP_NAIL_BITS,0,
   950  `notl	`$1'',
   951  `xorl	$GMP_NUMB_MASK, `$1'')')
   952  
   953  
   954  dnl  Usage LEA(symbol,reg)
   955  dnl  Usage LEAL(symbol_local_to_file,reg)
   956  
   957  define(`LEA',
   958  m4_assert_numargs(2)
   959  `ifdef(`PIC',`dnl
   960  ifelse(index(defn(`load_eip'), `$2'),-1,
   961  `m4append(`load_eip',
   962  `	TEXT
   963  	ALIGN(16)
   964  L(movl_eip_`'substr($2,1)):
   965  	movl	(%esp), $2
   966  	ret_internal
   967  ')')dnl
   968  	call	L(movl_eip_`'substr($2,1))
   969  	addl	$_GLOBAL_OFFSET_TABLE_, $2
   970  	movl	$1@GOT($2), $2
   971  ',`
   972  	movl	`$'$1, $2
   973  ')')
   974  
   975  define(`LEAL',
   976  m4_assert_numargs(2)
   977  `ifdef(`PIC',`dnl
   978  ifelse(index(defn(`load_eip'), `$2'),-1,
   979  `m4append(`load_eip',
   980  `	TEXT
   981  	ALIGN(16)
   982  L(movl_eip_`'substr($2,1)):
   983  	movl	(%esp), $2
   984  	ret_internal
   985  ')')dnl
   986  	call	L(movl_eip_`'substr($2,1))
   987  	addl	$_GLOBAL_OFFSET_TABLE_, $2
   988  	leal	$1@GOTOFF($2), $2
   989  ',`
   990  	movl	`$'$1, $2
   991  ')')
   992  
   993  dnl ASM_END
   994  
   995  define(`ASM_END',`load_eip')
   996  
   997  define(`load_eip', `')		dnl updated in LEA/LEAL
   998  
   999  
  1000  define(`DEF_OBJECT',
  1001  m4_assert_numargs_range(1,2)
  1002  	`RODATA
  1003  	ALIGN(ifelse($#,1,2,$2))
  1004  $1:
  1005  ')
  1006  
  1007  define(`END_OBJECT',
  1008  m4_assert_numargs(1)
  1009  `	SIZE(`$1',.-`$1')')
  1010  
  1011  dnl  Usage: CALL(funcname)
  1012  dnl
  1013  
  1014  define(`CALL',
  1015  m4_assert_numargs(1)
  1016  `ifdef(`PIC',
  1017    `call	GSYM_PREFIX`'$1@PLT',
  1018    `call	GSYM_PREFIX`'$1')')
  1019  
  1020  ifdef(`PIC',
  1021  `define(`PIC_WITH_EBX')',
  1022  `undefine(`PIC_WITH_EBX')')
  1023  
  1024  divert`'dnl