github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/atom/aors_n.asm (about)

     1  dnl  Intel Atom mpn_add_n/mpn_sub_n -- rp[] = up[] +- vp[].
     2  
     3  dnl  Copyright 2011 Free Software Foundation, Inc.
     4  
     5  dnl  Contributed to the GNU project by Marco Bodrato.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C			    cycles/limb
    36  C P5
    37  C P6 model 0-8,10-12
    38  C P6 model 9  (Banias)
    39  C P6 model 13 (Dothan)
    40  C P4 model 0  (Willamette)
    41  C P4 model 1  (?)
    42  C P4 model 2  (Northwood)
    43  C P4 model 3  (Prescott)
    44  C P4 model 4  (Nocona)
    45  C Intel Atom			 3
    46  C AMD K6
    47  C AMD K7
    48  C AMD K8
    49  C AMD K10
    50  
    51  ifdef(`OPERATION_add_n', `
    52  	define(M4_inst,        adcl)
    53  	define(M4_function_n,  mpn_add_n)
    54  	define(M4_function_nc, mpn_add_nc)
    55  	define(M4_description, add)
    56  ',`ifdef(`OPERATION_sub_n', `
    57  	define(M4_inst,        sbbl)
    58  	define(M4_function_n,  mpn_sub_n)
    59  	define(M4_function_nc, mpn_sub_nc)
    60  	define(M4_description, subtract)
    61  ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
    62  ')')')
    63  
    64  MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
    65  
    66  C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    67  C                         mp_size_t size);
    68  C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    69  C	                   mp_size_t size, mp_limb_t carry);
    70  C
    71  C Calculate src1,size M4_description src2,size, and store the result in
    72  C dst,size.  The return value is the carry bit from the top of the result (1
    73  C or 0).
    74  C
    75  C The _nc version accepts 1 or 0 for an initial carry into the low limb of
    76  C the calculation.  Note values other than 1 or 0 here will lead to garbage
    77  C results.
    78  
    79  defframe(PARAM_CARRY,20)
    80  defframe(PARAM_SIZE, 16)
    81  defframe(PARAM_SRC2, 12)
    82  defframe(PARAM_SRC1, 8)
    83  defframe(PARAM_DST,  4)
    84  
    85  dnl  re-use parameter space
    86  define(SAVE_RP,`PARAM_SIZE')
    87  define(SAVE_VP,`PARAM_SRC1')
    88  define(SAVE_UP,`PARAM_DST')
    89  
    90  define(`rp',  `%edi')
    91  define(`up',  `%esi')
    92  define(`vp',  `%ebx')
    93  define(`cy',  `%ecx')
    94  define(`r1',  `%ecx')
    95  define(`r2',  `%edx')
    96  
    97  ASM_START()
    98  	TEXT
    99  	ALIGN(16)
   100  deflit(`FRAME',0)
   101  
   102  PROLOGUE(M4_function_n)
   103  	xor	cy, cy			C carry
   104  L(start):
   105  	mov	PARAM_SIZE, %eax	C size
   106  	mov	rp, SAVE_RP
   107  	mov	PARAM_DST, rp
   108  	mov	up, SAVE_UP
   109  	mov	PARAM_SRC1, up
   110  	shr	%eax			C size >> 1
   111  	mov	vp, SAVE_VP
   112  	mov	PARAM_SRC2, vp
   113  	jz	L(one)			C size == 1
   114  	jc	L(three)		C size % 2 == 1
   115  
   116  	shr	cy
   117  	mov	(up), r2
   118  	lea	4(up), up
   119  	lea	4(vp), vp
   120  	lea	-4(rp), rp
   121  	jmp	L(entry)
   122  L(one):
   123  	shr	cy
   124  	mov	(up), r1
   125  	jmp	L(end)
   126  L(three):
   127  	shr	cy
   128  	mov	(up), r1
   129  
   130  	ALIGN(16)
   131  L(oop):
   132  	M4_inst	(vp), r1
   133  	lea	8(up), up
   134  	mov	-4(up), r2
   135  	lea	8(vp), vp
   136  	mov	r1, (rp)
   137  L(entry):
   138  	M4_inst	-4(vp), r2
   139  	lea	8(rp), rp
   140  	dec	%eax
   141  	mov	(up), r1
   142  	mov	r2, -4(rp)
   143  	jnz	L(oop)
   144  
   145  L(end):					C %eax is zero here
   146  	mov	SAVE_UP, up
   147  	M4_inst	(vp), r1
   148  	mov	SAVE_VP, vp
   149  	mov	r1, (rp)
   150  	adc	%eax, %eax
   151  	mov	SAVE_RP, rp
   152  	ret
   153  EPILOGUE()
   154  
   155  PROLOGUE(M4_function_nc)
   156  	mov	PARAM_CARRY, cy		C carry
   157  	jmp	L(start)
   158  EPILOGUE()
   159  ASM_END()