github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/logops_n.asm (about)

     1  dnl  Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
     2  
     3  dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C P5: 3.0 c/l  and, ior, xor
    35  C     3.5 c/l  andn, iorn, nand, nior, xnor
    36  
    37  
    38  define(M4_choose_op,
    39  `ifdef(`OPERATION_$1',`
    40  define(`M4_function', `mpn_$1')
    41  define(`M4_want_pre', `$4')
    42  define(`M4op',        `$3')
    43  define(`M4_want_post',`$2')
    44  ')')
    45  define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
    46  define(M4post,`ifelse(M4_want_post,yes,`$1')')
    47  
    48  M4_choose_op( and_n,     , andl,    )
    49  M4_choose_op( andn_n,    , andl, yes)
    50  M4_choose_op( nand_n, yes, andl,    )
    51  M4_choose_op( ior_n,     ,  orl,    )
    52  M4_choose_op( iorn_n,    ,  orl, yes)
    53  M4_choose_op( nior_n, yes,  orl,    )
    54  M4_choose_op( xor_n,     , xorl,    )
    55  M4_choose_op( xnor_n, yes, xorl,    )
    56  
    57  ifdef(`M4_function',,
    58  `m4_error(`Unrecognised or undefined OPERATION symbol
    59  ')')
    60  
    61  MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
    62  
    63  NAILS_SUPPORT(0-31)
    64  
    65  
    66  C void M4_function (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);
    67  C
    68  C Nothing complicated here, just some care to avoid data cache bank clashes
    69  C and AGIs.
    70  C
    71  C We're one register short of being able to do a simple 4 loads, 2 ops, 2
    72  C stores.  Instead %ebp is juggled a bit and nops are introduced to keep the
    73  C pairings as intended.  An in-place operation would free up a register, for
    74  C an 0.5 c/l speedup, if that's worth bothering with.
    75  C
    76  C This code seems best for P55 too.  Data alignment is a big problem for MMX
    77  C and the pairing restrictions on movq and integer instructions make life
    78  C difficult.
    79  
    80  defframe(PARAM_SIZE,16)
    81  defframe(PARAM_YP,  12)
    82  defframe(PARAM_XP,   8)
    83  defframe(PARAM_WP,   4)
    84  
    85  	TEXT
    86  	ALIGN(8)
    87  
    88  PROLOGUE(M4_function)
    89  deflit(`FRAME',0)
    90  
    91  	pushl	%ebx	FRAME_pushl()
    92  	pushl	%esi	FRAME_pushl()
    93  
    94  	pushl	%edi	FRAME_pushl()
    95  	pushl	%ebp	FRAME_pushl()
    96  
    97  	movl	PARAM_SIZE, %ecx
    98  	movl	PARAM_XP, %ebx
    99  
   100  	movl	PARAM_YP, %esi
   101  	movl	PARAM_WP, %edi
   102  
   103  	shrl	%ecx
   104  	jnc	L(entry)
   105  
   106  	movl	(%ebx,%ecx,8), %eax	C risk of data cache bank clash here
   107  	movl	(%esi,%ecx,8), %edx
   108  
   109  M4pre(`	notl_or_xorl_GMP_NUMB_MASK(%edx)')
   110  
   111  	M4op	%edx, %eax
   112  
   113  M4post(`xorl	$GMP_NUMB_MASK, %eax')
   114  	orl	%ecx, %ecx
   115  
   116  	movl	%eax, (%edi,%ecx,8)
   117  	jz	L(done)
   118  
   119  	jmp	L(entry)
   120  
   121  
   122  L(top):
   123  	C eax
   124  	C ebx	xp
   125  	C ecx	counter, limb pairs, decrementing
   126  	C edx
   127  	C esi	yp
   128  	C edi	wp
   129  	C ebp
   130  
   131  	M4op	%ebp, %edx
   132  	nop
   133  
   134  M4post(`xorl	$GMP_NUMB_MASK, %eax')
   135  M4post(`xorl	$GMP_NUMB_MASK, %edx')
   136  
   137  	movl	%eax, 4(%edi,%ecx,8)
   138  	movl	%edx, (%edi,%ecx,8)
   139  
   140  L(entry):
   141  	movl	-4(%ebx,%ecx,8), %ebp
   142  	nop
   143  
   144  	movl	-4(%esi,%ecx,8), %eax
   145  	movl	-8(%esi,%ecx,8), %edx
   146  
   147  M4pre(`	xorl	$GMP_NUMB_MASK, %eax')
   148  M4pre(`	xorl	$GMP_NUMB_MASK, %edx')
   149  
   150  	M4op	%ebp, %eax
   151  	movl	-8(%ebx,%ecx,8), %ebp
   152  
   153  	decl	%ecx
   154  	jnz	L(top)
   155  
   156  
   157  	M4op	%ebp, %edx
   158  	nop
   159  
   160  M4post(`xorl	$GMP_NUMB_MASK, %eax')
   161  M4post(`xorl	$GMP_NUMB_MASK, %edx')
   162  
   163  	movl	%eax, 4(%edi,%ecx,8)
   164  	movl	%edx, (%edi,%ecx,8)
   165  
   166  
   167  L(done):
   168  	popl	%ebp
   169  	popl	%edi
   170  
   171  	popl	%esi
   172  	popl	%ebx
   173  
   174  	ret
   175  
   176  EPILOGUE()