github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/p6/aors_n.asm (about)

     1  dnl  Intel P6 mpn_add_n/mpn_sub_n -- mpn add or subtract.
     2  
     3  dnl  Copyright 2006 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C TODO:
    34  C  * Avoid indexed addressing, it makes us stall on the two-ported register
    35  C    file.
    36  
    37  C			    cycles/limb
    38  C P6 model 0-8,10-12		3.17
    39  C P6 model 9   (Banias)		2.15
    40  C P6 model 13  (Dothan)		2.25
    41  
    42  
    43  define(`rp',	`%edi')
    44  define(`up',	`%esi')
    45  define(`vp',	`%ebx')
    46  define(`n',	`%ecx')
    47  
    48  ifdef(`OPERATION_add_n', `
    49  	define(ADCSBB,	      adc)
    50  	define(func,	      mpn_add_n)
    51  	define(func_nc,	      mpn_add_nc)')
    52  ifdef(`OPERATION_sub_n', `
    53  	define(ADCSBB,	      sbb)
    54  	define(func,	      mpn_sub_n)
    55  	define(func_nc,	      mpn_sub_nc)')
    56  
    57  MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
    58  
    59  ASM_START()
    60  
    61  	TEXT
    62  	ALIGN(16)
    63  
    64  PROLOGUE(func)
    65  	xor	%edx, %edx
    66  L(start):
    67  	push	%edi
    68  	push	%esi
    69  	push	%ebx
    70  
    71  	mov	16(%esp), rp
    72  	mov	20(%esp), up
    73  	mov	24(%esp), vp
    74  	mov	28(%esp), n
    75  
    76  	lea	(up,n,4), up
    77  	lea	(vp,n,4), vp
    78  	lea	(rp,n,4), rp
    79  
    80  	neg	n
    81  	mov	n, %eax
    82  	and	$-8, n
    83  	and	$7, %eax
    84  	shl	$2, %eax			C 4x
    85  ifdef(`PIC',`
    86  	call	L(pic_calc)
    87  L(here):
    88  ',`
    89  	lea	L(ent) (%eax,%eax,2), %eax	C 12x
    90  ')
    91  
    92  	shr	%edx				C set cy flag
    93  	jmp	*%eax
    94  
    95  ifdef(`PIC',`
    96  L(pic_calc):
    97  	C See mpn/x86/README about old gas bugs
    98  	lea	(%eax,%eax,2), %eax
    99  	add	$L(ent)-L(here), %eax
   100  	add	(%esp), %eax
   101  	ret_internal
   102  ')
   103  
   104  L(end):
   105  	sbb	%eax, %eax
   106  	neg	%eax
   107  	pop	%ebx
   108  	pop	%esi
   109  	pop	%edi
   110  	ret
   111  
   112  	ALIGN(16)
   113  L(top):
   114  	jecxz	L(end)
   115  L(ent):
   116  Zdisp(	mov,	0,(up,n,4), %eax)
   117  Zdisp(	ADCSBB,	0,(vp,n,4), %eax)
   118  Zdisp(	mov,	%eax, 0,(rp,n,4))
   119  
   120  	mov	4(up,n,4), %edx
   121  	ADCSBB	4(vp,n,4), %edx
   122  	mov	%edx, 4(rp,n,4)
   123  
   124  	mov	8(up,n,4), %eax
   125  	ADCSBB	8(vp,n,4), %eax
   126  	mov	%eax, 8(rp,n,4)
   127  
   128  	mov	12(up,n,4), %edx
   129  	ADCSBB	12(vp,n,4), %edx
   130  	mov	%edx, 12(rp,n,4)
   131  
   132  	mov	16(up,n,4), %eax
   133  	ADCSBB	16(vp,n,4), %eax
   134  	mov	%eax, 16(rp,n,4)
   135  
   136  	mov	20(up,n,4), %edx
   137  	ADCSBB	20(vp,n,4), %edx
   138  	mov	%edx, 20(rp,n,4)
   139  
   140  	mov	24(up,n,4), %eax
   141  	ADCSBB	24(vp,n,4), %eax
   142  	mov	%eax, 24(rp,n,4)
   143  
   144  	mov	28(up,n,4), %edx
   145  	ADCSBB	28(vp,n,4), %edx
   146  	mov	%edx, 28(rp,n,4)
   147  
   148  	lea	8(n), n
   149  	jmp	L(top)
   150  
   151  EPILOGUE()
   152  
   153  PROLOGUE(func_nc)
   154  	movl	20(%esp), %edx
   155  	jmp	L(start)
   156  EPILOGUE()