github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/pa64/aors_n.asm (about)

     1  dnl  HP-PA 2.0 mpn_add_n, mpn_sub_n
     2  
     3  dnl  Copyright 1997, 2000, 2002, 2003, 2009, 2010 Free Software Foundation,
     4  dnl  Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  
    33  dnl  This runs at 2 cycles/limb on PA8000 and 1.6875 cycles/limb on PA8500.  It
    34  dnl  should be possible to reach the cache bandwidth 1.5 cycles/limb at least
    35  dnl  with PA8500.  The problem now is stalling of the first ADD,DC after LDO,
    36  dnl  where the processor gets confused about where carry comes from.
    37  
    38  include(`../config.m4')
    39  
    40  dnl INPUT PARAMETERS
    41  define(`rp',`%r26')
    42  define(`up',`%r25')
    43  define(`vp',`%r24')
    44  define(`n',`%r23')
    45  
    46  ifdef(`OPERATION_add_n', `
    47  	define(ADCSBC,	      `add,dc')
    48  	define(INITCY,	      `addi -1,%r22,%r0')
    49  	define(func,	      mpn_add_n)
    50  	define(func_nc,	      mpn_add_nc)')
    51  ifdef(`OPERATION_sub_n', `
    52  	define(ADCSBC,	      `sub,db')
    53  	define(INITCY,	      `subi 0,%r22,%r0')
    54  	define(func,	      mpn_sub_n)
    55  	define(func_nc,	      mpn_sub_nc)')
    56  
    57  MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
    58  
    59  ifdef(`HAVE_ABI_2_0w',
    60  `       .level  2.0w
    61  ',`     .level  2.0
    62  ')
    63  PROLOGUE(func_nc)
    64  ifdef(`HAVE_ABI_2_0w',
    65  `	b		L(com)
    66  	nop
    67  ',`	b		L(com)
    68  	ldw		-52(%r30), %r22
    69  ')
    70  EPILOGUE()
    71  PROLOGUE(func)
    72  	ldi		0, %r22
    73  LDEF(com)
    74  	sub		%r0, n, %r21
    75  	depw,z		%r21, 30, 3, %r28	C r28 = 2 * (-n & 7)
    76  	depw,z		%r21, 28, 3, %r21	C r21 = 8 * (-n & 7)
    77  	sub		up, %r21, up		C offset up
    78  	sub		vp, %r21, vp		C offset vp
    79  	sub		rp, %r21, rp		C offset rp
    80  	blr		%r28, %r0		C branch into loop
    81  	INITCY
    82  
    83  LDEF(loop)
    84  	ldd		0(up), %r20
    85  	ldd		0(vp), %r31
    86  	ADCSBC		%r20, %r31, %r20
    87  	std		%r20, 0(rp)
    88  LDEF(7)	ldd		8(up), %r21
    89  	ldd		8(vp), %r19
    90  	ADCSBC		%r21, %r19, %r21
    91  	std		%r21, 8(rp)
    92  LDEF(6)	ldd		16(up), %r20
    93  	ldd		16(vp), %r31
    94  	ADCSBC		%r20, %r31, %r20
    95  	std		%r20, 16(rp)
    96  LDEF(5)	ldd		24(up), %r21
    97  	ldd		24(vp), %r19
    98  	ADCSBC		%r21, %r19, %r21
    99  	std		%r21, 24(rp)
   100  LDEF(4)	ldd		32(up), %r20
   101  	ldd		32(vp), %r31
   102  	ADCSBC		%r20, %r31, %r20
   103  	std		%r20, 32(rp)
   104  LDEF(3)	ldd		40(up), %r21
   105  	ldd		40(vp), %r19
   106  	ADCSBC		%r21, %r19, %r21
   107  	std		%r21, 40(rp)
   108  LDEF(2)	ldd		48(up), %r20
   109  	ldd		48(vp), %r31
   110  	ADCSBC		%r20, %r31, %r20
   111  	std		%r20, 48(rp)
   112  LDEF(1)	ldd		56(up), %r21
   113  	ldd		56(vp), %r19
   114  	ADCSBC		%r21, %r19, %r21
   115  	ldo		64(up), up
   116  	std		%r21, 56(rp)
   117  	ldo		64(vp), vp
   118  	addib,>		-8, n, L(loop)
   119  	ldo		64(rp), rp
   120  
   121  	add,dc		%r0, %r0, %r29
   122  ifdef(`OPERATION_sub_n',`
   123  	subi		1, %r29, %r29
   124  ')
   125  	bve		(%r2)
   126  ifdef(`HAVE_ABI_2_0w',
   127  `	copy		%r29, %r28
   128  ',`	ldi		0, %r28
   129  ')
   130  EPILOGUE()