github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/aors_err1_n.asm (about)

     1  dnl  AMD64 mpn_add_err1_n, mpn_sub_err1_n
     2  
     3  dnl  Contributed by David Harvey.
     4  
     5  dnl  Copyright 2011 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C	     cycles/limb
    36  C AMD K8,K9	 2.75 (degenerates to 3 c/l for some alignments)
    37  C AMD K10	 ?
    38  C Intel P4	 ?
    39  C Intel core2	 ?
    40  C Intel corei	 ?
    41  C Intel atom	 ?
    42  C VIA nano	 ?
    43  
    44  
    45  C INPUT PARAMETERS
    46  define(`rp',	`%rdi')
    47  define(`up',	`%rsi')
    48  define(`vp',	`%rdx')
    49  define(`ep',	`%rcx')
    50  define(`yp',	`%r8')
    51  define(`n',	`%r9')
    52  define(`cy_param',	`8(%rsp)')
    53  
    54  define(`el',	`%rbx')
    55  define(`eh',	`%rbp')
    56  define(`t0',	`%r10')
    57  define(`t1',	`%r11')
    58  define(`t2',	`%r12')
    59  define(`t3',	`%r13')
    60  define(`w0',	`%r14')
    61  define(`w1',	`%r15')
    62  
    63  ifdef(`OPERATION_add_err1_n', `
    64  	define(ADCSBB,	      adc)
    65  	define(func,	      mpn_add_err1_n)')
    66  ifdef(`OPERATION_sub_err1_n', `
    67  	define(ADCSBB,	      sbb)
    68  	define(func,	      mpn_sub_err1_n)')
    69  
    70  MULFUNC_PROLOGUE(mpn_add_err1_n mpn_sub_err1_n)
    71  
    72  
    73  ASM_START()
    74  	TEXT
    75  	ALIGN(16)
    76  PROLOGUE(func)
    77  	mov	cy_param, %rax
    78  
    79  	push	%rbx
    80  	push	%rbp
    81  	push	%r12
    82  	push	%r13
    83  	push	%r14
    84  	push	%r15
    85  
    86  	lea	(up,n,8), up
    87  	lea	(vp,n,8), vp
    88  	lea	(rp,n,8), rp
    89  
    90  	mov	R32(n), R32(%r10)
    91  	and	$3, R32(%r10)
    92  	jz	L(0mod4)
    93  	cmp	$2, R32(%r10)
    94  	jc	L(1mod4)
    95  	jz	L(2mod4)
    96  L(3mod4):
    97  	xor	R32(el), R32(el)
    98  	xor	R32(eh), R32(eh)
    99  	xor	R32(t0), R32(t0)
   100  	xor	R32(t1), R32(t1)
   101  	lea	-24(yp,n,8), yp
   102  	neg	n
   103  
   104  	shr	$1, %al		   C restore carry
   105  	mov	(up,n,8), w0
   106  	mov	8(up,n,8), w1
   107  	ADCSBB	(vp,n,8), w0
   108  	mov	w0, (rp,n,8)
   109  	cmovc	16(yp), el
   110  	ADCSBB	8(vp,n,8), w1
   111  	mov	w1, 8(rp,n,8)
   112  	cmovc	8(yp), t0
   113  	mov	16(up,n,8), w0
   114  	ADCSBB	16(vp,n,8), w0
   115  	mov	w0, 16(rp,n,8)
   116  	cmovc	(yp), t1
   117  	setc	%al		   C save carry
   118  	add	t0, el
   119  	adc	$0, eh
   120  	add	t1, el
   121  	adc	$0, eh
   122  
   123  	add	$3, n
   124  	jnz	L(loop)
   125  	jmp	L(end)
   126  
   127  	ALIGN(16)
   128  L(0mod4):
   129  	xor	R32(el), R32(el)
   130  	xor	R32(eh), R32(eh)
   131  	lea	(yp,n,8), yp
   132  	neg	n
   133  	jmp	L(loop)
   134  
   135  	ALIGN(16)
   136  L(1mod4):
   137  	xor	R32(el), R32(el)
   138  	xor	R32(eh), R32(eh)
   139  	lea	-8(yp,n,8), yp
   140  	neg	n
   141  
   142  	shr	$1, %al		   C restore carry
   143  	mov	(up,n,8), w0
   144  	ADCSBB	(vp,n,8), w0
   145  	mov	w0, (rp,n,8)
   146  	cmovc	(yp), el
   147  	setc	%al		   C save carry
   148  
   149  	add	$1, n
   150  	jnz	L(loop)
   151  	jmp	L(end)
   152  
   153  	ALIGN(16)
   154  L(2mod4):
   155  	xor	R32(el), R32(el)
   156  	xor	R32(eh), R32(eh)
   157  	xor	R32(t0), R32(t0)
   158  	lea	-16(yp,n,8), yp
   159  	neg	n
   160  
   161  	shr	$1, %al		   C restore carry
   162  	mov	(up,n,8), w0
   163  	mov	8(up,n,8), w1
   164  	ADCSBB	(vp,n,8), w0
   165  	mov	w0, (rp,n,8)
   166  	cmovc	8(yp), el
   167  	ADCSBB	8(vp,n,8), w1
   168  	mov	w1, 8(rp,n,8)
   169  	cmovc	(yp), t0
   170  	setc	%al		   C save carry
   171  	add	t0, el
   172  	adc	$0, eh
   173  
   174  	add	$2, n
   175  	jnz	L(loop)
   176  	jmp	L(end)
   177  
   178  	ALIGN(32)
   179  L(loop):
   180  	shr	$1, %al		   C restore carry
   181  	mov	-8(yp), t0
   182  	mov	$0, R32(t3)
   183  	mov	(up,n,8), w0
   184  	mov	8(up,n,8), w1
   185  	ADCSBB	(vp,n,8), w0
   186  	cmovnc	t3, t0
   187  	ADCSBB	8(vp,n,8), w1
   188  	mov	-16(yp), t1
   189  	mov	w0, (rp,n,8)
   190  	mov	16(up,n,8), w0
   191  	mov	w1, 8(rp,n,8)
   192  	cmovnc	t3, t1
   193  	mov	-24(yp), t2
   194  	ADCSBB	16(vp,n,8), w0
   195  	cmovnc	t3, t2
   196  	mov	24(up,n,8), w1
   197  	ADCSBB	24(vp,n,8), w1
   198  	cmovc	-32(yp), t3
   199  	setc	%al		   C save carry
   200  	add	t0, el
   201  	adc	$0, eh
   202  	add	t1, el
   203  	adc	$0, eh
   204  	add	t2, el
   205  	adc	$0, eh
   206  	mov	w0, 16(rp,n,8)
   207  	add	t3, el
   208  	lea	-32(yp), yp
   209  	adc	$0, eh
   210  	mov	w1, 24(rp,n,8)
   211  	add	$4, n
   212  	jnz	L(loop)
   213  
   214  L(end):
   215  	mov	el, (ep)
   216  	mov	eh, 8(ep)
   217  
   218  	pop	%r15
   219  	pop	%r14
   220  	pop	%r13
   221  	pop	%r12
   222  	pop	%rbp
   223  	pop	%rbx
   224  	ret
   225  EPILOGUE()