github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/aors_err2_n.asm (about)

     1  dnl  AMD64 mpn_add_err2_n, mpn_sub_err2_n
     2  
     3  dnl  Contributed by David Harvey.
     4  
     5  dnl  Copyright 2011 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C	     cycles/limb
    36  C AMD K8,K9	 4.5
    37  C AMD K10	 ?
    38  C Intel P4	 ?
    39  C Intel core2	 6.9
    40  C Intel corei	 ?
    41  C Intel atom	 ?
    42  C VIA nano	 ?
    43  
    44  
    45  C INPUT PARAMETERS
    46  define(`rp',	`%rdi')
    47  define(`up',	`%rsi')
    48  define(`vp',	`%rdx')
    49  define(`ep',	`%rcx')
    50  define(`yp1',	`%r8')
    51  define(`yp2',   `%r9')
    52  define(`n_param',     `8(%rsp)')
    53  define(`cy_param',    `16(%rsp)')
    54  
    55  define(`cy1',   `%r14')
    56  define(`cy2',   `%rax')
    57  
    58  define(`n',     `%r10')
    59  
    60  define(`w',     `%rbx')
    61  define(`e1l',	`%rbp')
    62  define(`e1h',	`%r11')
    63  define(`e2l',	`%r12')
    64  define(`e2h',	`%r13')
    65  
    66  
    67  ifdef(`OPERATION_add_err2_n', `
    68  	define(ADCSBB,	      adc)
    69  	define(func,	      mpn_add_err2_n)')
    70  ifdef(`OPERATION_sub_err2_n', `
    71  	define(ADCSBB,	      sbb)
    72  	define(func,	      mpn_sub_err2_n)')
    73  
    74  MULFUNC_PROLOGUE(mpn_add_err2_n mpn_sub_err2_n)
    75  
    76  
    77  ASM_START()
    78  	TEXT
    79  	ALIGN(16)
    80  PROLOGUE(func)
    81  	mov	cy_param, cy2
    82  	mov	n_param, n
    83  
    84  	push	%rbx
    85  	push	%rbp
    86  	push	%r12
    87  	push	%r13
    88  	push	%r14
    89  
    90  	xor	R32(e1l), R32(e1l)
    91  	xor	R32(e1h), R32(e1h)
    92  	xor	R32(e2l), R32(e2l)
    93  	xor	R32(e2h), R32(e2h)
    94  
    95  	sub	yp1, yp2
    96  
    97  	lea	(rp,n,8), rp
    98  	lea	(up,n,8), up
    99  	lea	(vp,n,8), vp
   100  
   101  	test	$1, n
   102  	jnz	L(odd)
   103  
   104  	lea	-8(yp1,n,8), yp1
   105  	neg	n
   106  	jmp	L(top)
   107  
   108  	ALIGN(16)
   109  L(odd):
   110  	lea	-16(yp1,n,8), yp1
   111  	neg	n
   112  	shr	$1, cy2
   113  	mov	(up,n,8), w
   114  	ADCSBB	(vp,n,8), w
   115  	cmovc	8(yp1), e1l
   116  	cmovc	8(yp1,yp2), e2l
   117  	mov	w, (rp,n,8)
   118  	sbb	cy2, cy2
   119  	inc	n
   120  	jz	L(end)
   121  
   122  	ALIGN(16)
   123  L(top):
   124  	mov	(up,n,8), w
   125  	shr	$1, cy2		C restore carry
   126  	ADCSBB	(vp,n,8), w
   127  	mov	w, (rp,n,8)
   128  	sbb	cy1, cy1	C generate mask, preserve CF
   129  
   130  	mov	8(up,n,8), w
   131  	ADCSBB	8(vp,n,8), w
   132  	mov	w, 8(rp,n,8)
   133  	sbb	cy2, cy2	C generate mask, preserve CF
   134  
   135  	mov	(yp1), w	C (e1h:e1l) += cy1 * yp1 limb
   136  	and	cy1, w
   137  	add	w, e1l
   138  	adc	$0, e1h
   139  
   140  	and	(yp1,yp2), cy1	C (e2h:e2l) += cy1 * yp2 limb
   141  	add	cy1, e2l
   142  	adc	$0, e2h
   143  
   144  	mov	-8(yp1), w	C (e1h:e1l) += cy2 * next yp1 limb
   145  	and	cy2, w
   146  	add	w, e1l
   147  	adc	$0, e1h
   148  
   149  	mov	-8(yp1,yp2), w	C (e2h:e2l) += cy2 * next yp2 limb
   150  	and	cy2, w
   151  	add	w, e2l
   152  	adc	$0, e2h
   153  
   154  	add	$2, n
   155  	lea	-16(yp1), yp1
   156  	jnz	L(top)
   157  L(end):
   158  
   159  	mov	e1l, (ep)
   160  	mov	e1h, 8(ep)
   161  	mov	e2l, 16(ep)
   162  	mov	e2h, 24(ep)
   163  
   164  	and	$1, %eax	C return carry
   165  
   166  	pop	%r14
   167  	pop	%r13
   168  	pop	%r12
   169  	pop	%rbp
   170  	pop	%rbx
   171  	ret
   172  EPILOGUE()