github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/power/submul_1.asm (about)

     1  dnl  IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract
     2  dnl  the result from a second limb vector.
     3  
     4  dnl  Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  
    33  dnl  INPUT PARAMETERS
    34  dnl  res_ptr	r3
    35  dnl  s1_ptr	r4
    36  dnl  size	r5
    37  dnl  s2_limb	r6
    38  
    39  dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
    40  dnl  instruction.  To obtain that operation, we have to use the 32x32->64
    41  dnl  signed multiplication instruction, and add the appropriate compensation to
    42  dnl  the high limb of the result.  We add the multiplicand if the multiplier
    43  dnl  has its most significant bit set, and we add the multiplier if the
    44  dnl  multiplicand has its most significant bit set.  We need to preserve the
    45  dnl  carry flag between each iteration, so we have to compute the compensation
    46  dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
    47  dnl  branch in zero cycles, we use conditional branches for the compensation.
    48  
    49  include(`../config.m4')
    50  
    51  ASM_START()
    52  PROLOGUE(mpn_submul_1)
    53  	cal	3,-4(3)
    54  	l	0,0(4)
    55  	cmpi	0,6,0
    56  	mtctr	5
    57  	mul	9,0,6
    58  	srai	7,0,31
    59  	and	7,7,6
    60  	mfmq	11
    61  	cax	9,9,7
    62  	l	7,4(3)
    63  	sf	8,11,7		C add res_limb
    64  	a	11,8,11		C invert cy (r11 is junk)
    65  	blt	Lneg
    66  Lpos:	bdz	Lend
    67  
    68  Lploop:	lu	0,4(4)
    69  	stu	8,4(3)
    70  	cmpi	0,0,0
    71  	mul	10,0,6
    72  	mfmq	0
    73  	ae	11,0,9		C low limb + old_cy_limb + old cy
    74  	l	7,4(3)
    75  	aze	10,10		C propagate cy to new cy_limb
    76  	sf	8,11,7		C add res_limb
    77  	a	11,8,11		C invert cy (r11 is junk)
    78  	bge	Lp0
    79  	cax	10,10,6		C adjust high limb for negative limb from s1
    80  Lp0:	bdz	Lend0
    81  	lu	0,4(4)
    82  	stu	8,4(3)
    83  	cmpi	0,0,0
    84  	mul	9,0,6
    85  	mfmq	0
    86  	ae	11,0,10
    87  	l	7,4(3)
    88  	aze	9,9
    89  	sf	8,11,7
    90  	a	11,8,11		C invert cy (r11 is junk)
    91  	bge	Lp1
    92  	cax	9,9,6		C adjust high limb for negative limb from s1
    93  Lp1:	bdn	Lploop
    94  
    95  	b	Lend
    96  
    97  Lneg:	cax	9,9,0
    98  	bdz	Lend
    99  Lnloop:	lu	0,4(4)
   100  	stu	8,4(3)
   101  	cmpi	0,0,0
   102  	mul	10,0,6
   103  	mfmq	7
   104  	ae	11,7,9
   105  	l	7,4(3)
   106  	ae	10,10,0		C propagate cy to new cy_limb
   107  	sf	8,11,7		C add res_limb
   108  	a	11,8,11		C invert cy (r11 is junk)
   109  	bge	Ln0
   110  	cax	10,10,6		C adjust high limb for negative limb from s1
   111  Ln0:	bdz	Lend0
   112  	lu	0,4(4)
   113  	stu	8,4(3)
   114  	cmpi	0,0,0
   115  	mul	9,0,6
   116  	mfmq	7
   117  	ae	11,7,10
   118  	l	7,4(3)
   119  	ae	9,9,0		C propagate cy to new cy_limb
   120  	sf	8,11,7		C add res_limb
   121  	a	11,8,11		C invert cy (r11 is junk)
   122  	bge	Ln1
   123  	cax	9,9,6		C adjust high limb for negative limb from s1
   124  Ln1:	bdn	Lnloop
   125  	b	Lend
   126  
   127  Lend0:	cal	9,0(10)
   128  Lend:	st	8,4(3)
   129  	aze	3,9
   130  	br
   131  EPILOGUE(mpn_submul_1)