github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/pa32/hppa1_1/pa7100/submul_1.asm (about)

     1  dnl  HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and
     2  dnl  subtract the result from a second limb vector.
     3  
     4  dnl  Copyright 1995, 2000-2003 Free Software Foundation, Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  include(`../config.m4')
    33  
    34  C INPUT PARAMETERS
    35  define(`res_ptr',`%r26')
    36  define(`s1_ptr',`%r25')
    37  define(`size_param',`%r24')
    38  define(`s2_limb',`%r23')
    39  
    40  define(`cylimb',`%r28')
    41  define(`s0',`%r19')
    42  define(`s1',`%r20')
    43  define(`s2',`%r3')
    44  define(`s3',`%r4')
    45  define(`lo0',`%r21')
    46  define(`lo1',`%r5')
    47  define(`lo2',`%r6')
    48  define(`lo3',`%r7')
    49  define(`hi0',`%r22')
    50  define(`hi1',`%r23')				C safe to reuse
    51  define(`hi2',`%r29')
    52  define(`hi3',`%r1')
    53  
    54  ASM_START()
    55  PROLOGUE(mpn_submul_1)
    56  C	.callinfo	frame=128,no_calls
    57  
    58  	ldo	128(%r30),%r30
    59  	stws	s2_limb,-16(%r30)
    60  	add	 %r0,%r0,cylimb			C clear cy and cylimb
    61  	addib,<	-4,size_param,L(few_limbs)
    62  	fldws	-16(%r30),%fr31R
    63  
    64  	ldo	-112(%r30),%r31
    65  	stw	%r3,-96(%r30)
    66  	stw	%r4,-92(%r30)
    67  	stw	%r5,-88(%r30)
    68  	stw	%r6,-84(%r30)
    69  	stw	%r7,-80(%r30)
    70  
    71  	bb,>=,n	 s1_ptr,29,L(0)
    72  
    73  	fldws,ma 4(s1_ptr),%fr4
    74  	ldws	 0(res_ptr),s0
    75  	xmpyu	 %fr4,%fr31R,%fr5
    76  	fstds	 %fr5,-16(%r31)
    77  	ldws	-16(%r31),cylimb
    78  	ldws	-12(%r31),lo0
    79  	sub	 s0,lo0,s0
    80  	add	 s0,lo0,%r0			C invert cy
    81  	addib,< -1,size_param,L(few_limbs)
    82  	stws,ma	 s0,4(res_ptr)
    83  
    84  C start software pipeline ----------------------------------------------------
    85  LDEF(0)
    86  	fldds,ma 8(s1_ptr),%fr4
    87  	fldds,ma 8(s1_ptr),%fr8
    88  
    89  	xmpyu	 %fr4L,%fr31R,%fr5
    90  	xmpyu	 %fr4R,%fr31R,%fr6
    91  	xmpyu	 %fr8L,%fr31R,%fr9
    92  	xmpyu	 %fr8R,%fr31R,%fr10
    93  
    94  	fstds	 %fr5,-16(%r31)
    95  	fstds	 %fr6,-8(%r31)
    96  	fstds	 %fr9,0(%r31)
    97  	fstds	 %fr10,8(%r31)
    98  
    99  	ldws   -16(%r31),hi0
   100  	ldws   -12(%r31),lo0
   101  	ldws	-8(%r31),hi1
   102  	ldws	-4(%r31),lo1
   103  	ldws	 0(%r31),hi2
   104  	ldws	 4(%r31),lo2
   105  	ldws	 8(%r31),hi3
   106  	ldws	12(%r31),lo3
   107  
   108  	addc	 lo0,cylimb,lo0
   109  	addc	 lo1,hi0,lo1
   110  	addc	 lo2,hi1,lo2
   111  	addc	 lo3,hi2,lo3
   112  
   113  	addib,<	 -4,size_param,L(end)
   114  	addc	 %r0,hi3,cylimb			C propagate carry into cylimb
   115  C main loop ------------------------------------------------------------------
   116  LDEF(loop)
   117  	fldds,ma 8(s1_ptr),%fr4
   118  	fldds,ma 8(s1_ptr),%fr8
   119  
   120  	ldws	 0(res_ptr),s0
   121  	xmpyu	 %fr4L,%fr31R,%fr5
   122  	ldws	 4(res_ptr),s1
   123  	xmpyu	 %fr4R,%fr31R,%fr6
   124  	ldws	 8(res_ptr),s2
   125  	xmpyu	 %fr8L,%fr31R,%fr9
   126  	ldws	12(res_ptr),s3
   127  	xmpyu	 %fr8R,%fr31R,%fr10
   128  
   129  	fstds	 %fr5,-16(%r31)
   130  	sub	 s0,lo0,s0
   131  	fstds	 %fr6,-8(%r31)
   132  	subb	 s1,lo1,s1
   133  	fstds	 %fr9,0(%r31)
   134  	subb	 s2,lo2,s2
   135  	fstds	 %fr10,8(%r31)
   136  	subb	 s3,lo3,s3
   137  	subb	 %r0,%r0,lo0			C these two insns ...
   138  	add	 lo0,lo0,%r0			C ... just invert cy
   139  
   140  	ldws   -16(%r31),hi0
   141  	ldws   -12(%r31),lo0
   142  	ldws	-8(%r31),hi1
   143  	ldws	-4(%r31),lo1
   144  	ldws	 0(%r31),hi2
   145  	ldws	 4(%r31),lo2
   146  	ldws	 8(%r31),hi3
   147  	ldws	12(%r31),lo3
   148  
   149  	addc	 lo0,cylimb,lo0
   150  	stws,ma	 s0,4(res_ptr)
   151  	addc	 lo1,hi0,lo1
   152  	stws,ma	 s1,4(res_ptr)
   153  	addc	 lo2,hi1,lo2
   154  	stws,ma	 s2,4(res_ptr)
   155  	addc	 lo3,hi2,lo3
   156  	stws,ma	 s3,4(res_ptr)
   157  
   158  	addib,>= -4,size_param,L(loop)
   159  	addc	 %r0,hi3,cylimb			C propagate carry into cylimb
   160  C finish software pipeline ---------------------------------------------------
   161  LDEF(end)
   162  	ldws	 0(res_ptr),s0
   163  	ldws	 4(res_ptr),s1
   164  	ldws	 8(res_ptr),s2
   165  	ldws	12(res_ptr),s3
   166  
   167  	sub	 s0,lo0,s0
   168  	stws,ma	 s0,4(res_ptr)
   169  	subb	 s1,lo1,s1
   170  	stws,ma	 s1,4(res_ptr)
   171  	subb	 s2,lo2,s2
   172  	stws,ma	 s2,4(res_ptr)
   173  	subb	 s3,lo3,s3
   174  	stws,ma	 s3,4(res_ptr)
   175  	subb	 %r0,%r0,lo0			C these two insns ...
   176  	add	 lo0,lo0,%r0			C ... invert cy
   177  
   178  C restore callee-saves registers ---------------------------------------------
   179  	ldw	-96(%r30),%r3
   180  	ldw	-92(%r30),%r4
   181  	ldw	-88(%r30),%r5
   182  	ldw	-84(%r30),%r6
   183  	ldw	-80(%r30),%r7
   184  
   185  LDEF(few_limbs)
   186  	addib,=,n 4,size_param,L(ret)
   187  
   188  LDEF(loop2)
   189  	fldws,ma 4(s1_ptr),%fr4
   190  	ldws	 0(res_ptr),s0
   191  	xmpyu	 %fr4,%fr31R,%fr5
   192  	fstds	 %fr5,-16(%r30)
   193  	ldws	-16(%r30),hi0
   194  	ldws	-12(%r30),lo0
   195  	addc	 lo0,cylimb,lo0
   196  	addc	 %r0,hi0,cylimb
   197  	sub	 s0,lo0,s0
   198  	add	 s0,lo0,%r0			C invert cy
   199  	stws,ma	 s0,4(res_ptr)
   200  	addib,<> -1,size_param,L(loop2)
   201  	nop
   202  
   203  LDEF(ret)
   204  	addc	 %r0,cylimb,cylimb
   205  	bv	 0(%r2)
   206  	ldo	 -128(%r30),%r30
   207  EPILOGUE(mpn_submul_1)