github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/pa32/hppa1_1/pa7100/addmul_1.asm (about)

     1  dnl  HP-PA 7100/7200 mpn_addmul_1 -- Multiply a limb vector with a limb and
     2  dnl  add the result to a second limb vector.
     3  
     4  dnl  Copyright 1995, 2000-2003 Free Software Foundation, Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  include(`../config.m4')
    33  
    34  C INPUT PARAMETERS
    35  define(`res_ptr',`%r26')
    36  define(`s1_ptr',`%r25')
    37  define(`size_param',`%r24')
    38  define(`s2_limb',`%r23')
    39  
    40  define(`cylimb',`%r28')
    41  define(`s0',`%r19')
    42  define(`s1',`%r20')
    43  define(`s2',`%r3')
    44  define(`s3',`%r4')
    45  define(`lo0',`%r21')
    46  define(`lo1',`%r5')
    47  define(`lo2',`%r6')
    48  define(`lo3',`%r7')
    49  define(`hi0',`%r22')
    50  define(`hi1',`%r23')				C safe to reuse
    51  define(`hi2',`%r29')
    52  define(`hi3',`%r1')
    53  
    54  ASM_START()
    55  PROLOGUE(mpn_addmul_1)
    56  C	.callinfo	frame=128,no_calls
    57  
    58  	ldo	128(%r30),%r30
    59  	stws	s2_limb,-16(%r30)
    60  	add	 %r0,%r0,cylimb			C clear cy and cylimb
    61  	addib,<	-4,size_param,L(few_limbs)
    62  	fldws	-16(%r30),%fr31R
    63  
    64  	ldo	-112(%r30),%r31
    65  	stw	%r3,-96(%r30)
    66  	stw	%r4,-92(%r30)
    67  	stw	%r5,-88(%r30)
    68  	stw	%r6,-84(%r30)
    69  	stw	%r7,-80(%r30)
    70  
    71  	bb,>=,n	 s1_ptr,29,L(0)
    72  
    73  	fldws,ma 4(s1_ptr),%fr4
    74  	ldws	 0(res_ptr),s0
    75  	xmpyu	 %fr4,%fr31R,%fr5
    76  	fstds	 %fr5,-16(%r31)
    77  	ldws	-16(%r31),cylimb
    78  	ldws	-12(%r31),lo0
    79  	add	 s0,lo0,s0
    80  	addib,< -1,size_param,L(few_limbs)
    81  	stws,ma	 s0,4(res_ptr)
    82  
    83  C start software pipeline ----------------------------------------------------
    84  LDEF(0)
    85  	fldds,ma 8(s1_ptr),%fr4
    86  	fldds,ma 8(s1_ptr),%fr8
    87  
    88  	xmpyu	 %fr4L,%fr31R,%fr5
    89  	xmpyu	 %fr4R,%fr31R,%fr6
    90  	xmpyu	 %fr8L,%fr31R,%fr9
    91  	xmpyu	 %fr8R,%fr31R,%fr10
    92  
    93  	fstds	 %fr5,-16(%r31)
    94  	fstds	 %fr6,-8(%r31)
    95  	fstds	 %fr9,0(%r31)
    96  	fstds	 %fr10,8(%r31)
    97  
    98  	ldws   -16(%r31),hi0
    99  	ldws   -12(%r31),lo0
   100  	ldws	-8(%r31),hi1
   101  	ldws	-4(%r31),lo1
   102  	ldws	 0(%r31),hi2
   103  	ldws	 4(%r31),lo2
   104  	ldws	 8(%r31),hi3
   105  	ldws	12(%r31),lo3
   106  
   107  	addc	 lo0,cylimb,lo0
   108  	addc	 lo1,hi0,lo1
   109  	addc	 lo2,hi1,lo2
   110  	addc	 lo3,hi2,lo3
   111  
   112  	addib,<	 -4,size_param,L(end)
   113  	addc	 %r0,hi3,cylimb			C propagate carry into cylimb
   114  C main loop ------------------------------------------------------------------
   115  LDEF(loop)
   116  	fldds,ma 8(s1_ptr),%fr4
   117  	fldds,ma 8(s1_ptr),%fr8
   118  
   119  	ldws	 0(res_ptr),s0
   120  	xmpyu	 %fr4L,%fr31R,%fr5
   121  	ldws	 4(res_ptr),s1
   122  	xmpyu	 %fr4R,%fr31R,%fr6
   123  	ldws	 8(res_ptr),s2
   124  	xmpyu	 %fr8L,%fr31R,%fr9
   125  	ldws	12(res_ptr),s3
   126  	xmpyu	 %fr8R,%fr31R,%fr10
   127  
   128  	fstds	 %fr5,-16(%r31)
   129  	add	 s0,lo0,s0
   130  	fstds	 %fr6,-8(%r31)
   131  	addc	 s1,lo1,s1
   132  	fstds	 %fr9,0(%r31)
   133  	addc	 s2,lo2,s2
   134  	fstds	 %fr10,8(%r31)
   135  	addc	 s3,lo3,s3
   136  
   137  	ldws   -16(%r31),hi0
   138  	ldws   -12(%r31),lo0
   139  	ldws	-8(%r31),hi1
   140  	ldws	-4(%r31),lo1
   141  	ldws	 0(%r31),hi2
   142  	ldws	 4(%r31),lo2
   143  	ldws	 8(%r31),hi3
   144  	ldws	12(%r31),lo3
   145  
   146  	addc	 lo0,cylimb,lo0
   147  	stws,ma	 s0,4(res_ptr)
   148  	addc	 lo1,hi0,lo1
   149  	stws,ma	 s1,4(res_ptr)
   150  	addc	 lo2,hi1,lo2
   151  	stws,ma	 s2,4(res_ptr)
   152  	addc	 lo3,hi2,lo3
   153  	stws,ma	 s3,4(res_ptr)
   154  
   155  	addib,>= -4,size_param,L(loop)
   156  	addc	 %r0,hi3,cylimb			C propagate carry into cylimb
   157  C finish software pipeline ---------------------------------------------------
   158  LDEF(end)
   159  	ldws	 0(res_ptr),s0
   160  	ldws	 4(res_ptr),s1
   161  	ldws	 8(res_ptr),s2
   162  	ldws	12(res_ptr),s3
   163  
   164  	add	 s0,lo0,s0
   165  	stws,ma	 s0,4(res_ptr)
   166  	addc	 s1,lo1,s1
   167  	stws,ma	 s1,4(res_ptr)
   168  	addc	 s2,lo2,s2
   169  	stws,ma	 s2,4(res_ptr)
   170  	addc	 s3,lo3,s3
   171  	stws,ma	 s3,4(res_ptr)
   172  
   173  C restore callee-saves registers ---------------------------------------------
   174  	ldw	-96(%r30),%r3
   175  	ldw	-92(%r30),%r4
   176  	ldw	-88(%r30),%r5
   177  	ldw	-84(%r30),%r6
   178  	ldw	-80(%r30),%r7
   179  
   180  LDEF(few_limbs)
   181  	addib,=,n 4,size_param,L(ret)
   182  
   183  LDEF(loop2)
   184  	fldws,ma 4(s1_ptr),%fr4
   185  	ldws	 0(res_ptr),s0
   186  	xmpyu	 %fr4,%fr31R,%fr5
   187  	fstds	 %fr5,-16(%r30)
   188  	ldws	-16(%r30),hi0
   189  	ldws	-12(%r30),lo0
   190  	addc	 lo0,cylimb,lo0
   191  	addc	 %r0,hi0,cylimb
   192  	add	 s0,lo0,s0
   193  	stws,ma	 s0,4(res_ptr)
   194  	addib,<> -1,size_param,L(loop2)
   195  	nop
   196  
   197  LDEF(ret)
   198  	addc	 %r0,cylimb,cylimb
   199  	bv	 0(%r2)
   200  	ldo	 -128(%r30),%r30
   201  EPILOGUE(mpn_addmul_1)