github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/power/addmul_1.asm (about)

     1  dnl  IBM POWER mpn_addmul_1 -- Multiply a limb vector with a limb and add the
     2  dnl  result to a second limb vector.
     3  
     4  dnl  Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  
    33  dnl  INPUT PARAMETERS
    34  dnl  res_ptr	r3
    35  dnl  s1_ptr	r4
    36  dnl  size	r5
    37  dnl  s2_limb	r6
    38  
    39  dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
    40  dnl  instruction.  To obtain that operation, we have to use the 32x32->64
    41  dnl  signed multiplication instruction, and add the appropriate compensation to
    42  dnl  the high limb of the result.  We add the multiplicand if the multiplier
    43  dnl  has its most significant bit set, and we add the multiplier if the
    44  dnl  multiplicand has its most significant bit set.  We need to preserve the
    45  dnl  carry flag between each iteration, so we have to compute the compensation
    46  dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
    47  dnl  branch in zero cycles, we use conditional branches for the compensation.
    48  
    49  include(`../config.m4')
    50  
    51  ASM_START()
    52  PROLOGUE(mpn_addmul_1)
    53  	cal	3,-4(3)
    54  	l	0,0(4)
    55  	cmpi	0,6,0
    56  	mtctr	5
    57  	mul	9,0,6
    58  	srai	7,0,31
    59  	and	7,7,6
    60  	mfmq	8
    61  	cax	9,9,7
    62  	l	7,4(3)
    63  	a	8,8,7		C add res_limb
    64  	blt	Lneg
    65  Lpos:	bdz	Lend
    66  
    67  Lploop:	lu	0,4(4)
    68  	stu	8,4(3)
    69  	cmpi	0,0,0
    70  	mul	10,0,6
    71  	mfmq	0
    72  	ae	8,0,9		C low limb + old_cy_limb + old cy
    73  	l	7,4(3)
    74  	aze	10,10		C propagate cy to new cy_limb
    75  	a	8,8,7		C add res_limb
    76  	bge	Lp0
    77  	cax	10,10,6		C adjust high limb for negative limb from s1
    78  Lp0:	bdz	Lend0
    79  	lu	0,4(4)
    80  	stu	8,4(3)
    81  	cmpi	0,0,0
    82  	mul	9,0,6
    83  	mfmq	0
    84  	ae	8,0,10
    85  	l	7,4(3)
    86  	aze	9,9
    87  	a	8,8,7
    88  	bge	Lp1
    89  	cax	9,9,6		C adjust high limb for negative limb from s1
    90  Lp1:	bdn	Lploop
    91  
    92  	b	Lend
    93  
    94  Lneg:	cax	9,9,0
    95  	bdz	Lend
    96  Lnloop:	lu	0,4(4)
    97  	stu	8,4(3)
    98  	cmpi	0,0,0
    99  	mul	10,0,6
   100  	mfmq	7
   101  	ae	8,7,9
   102  	l	7,4(3)
   103  	ae	10,10,0		C propagate cy to new cy_limb
   104  	a	8,8,7		C add res_limb
   105  	bge	Ln0
   106  	cax	10,10,6		C adjust high limb for negative limb from s1
   107  Ln0:	bdz	Lend0
   108  	lu	0,4(4)
   109  	stu	8,4(3)
   110  	cmpi	0,0,0
   111  	mul	9,0,6
   112  	mfmq	7
   113  	ae	8,7,10
   114  	l	7,4(3)
   115  	ae	9,9,0		C propagate cy to new cy_limb
   116  	a	8,8,7		C add res_limb
   117  	bge	Ln1
   118  	cax	9,9,6		C adjust high limb for negative limb from s1
   119  Ln1:	bdn	Lnloop
   120  	b	Lend
   121  
   122  Lend0:	cal	9,0(10)
   123  Lend:	st	8,4(3)
   124  	aze	3,9
   125  	br
   126  EPILOGUE(mpn_addmul_1)