github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/power/mul_1.asm (about)

     1  dnl  IBM POWER mpn_mul_1 -- Multiply a limb vector with a limb and store the
     2  dnl  result in a second limb vector.
     3  
     4  dnl  Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  
    33  dnl  INPUT PARAMETERS
    34  dnl  res_ptr	r3
    35  dnl  s1_ptr	r4
    36  dnl  size	r5
    37  dnl  s2_limb	r6
    38  
    39  dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
    40  dnl  instruction.  To obtain that operation, we have to use the 32x32->64
    41  dnl  signed multiplication instruction, and add the appropriate compensation to
    42  dnl  the high limb of the result.  We add the multiplicand if the multiplier
    43  dnl  has its most significant bit set, and we add the multiplier if the
    44  dnl  multiplicand has its most significant bit set.  We need to preserve the
    45  dnl  carry flag between each iteration, so we have to compute the compensation
    46  dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
    47  dnl  branch in zero cycles, we use conditional branches for the compensation.
    48  
    49  include(`../config.m4')
    50  
    51  ASM_START()
    52  PROLOGUE(mpn_mul_1)
    53  	cal	3,-4(3)
    54  	l	0,0(4)
    55  	cmpi	0,6,0
    56  	mtctr	5
    57  	mul	9,0,6
    58  	srai	7,0,31
    59  	and	7,7,6
    60  	mfmq	8
    61  	ai	0,0,0		C reset carry
    62  	cax	9,9,7
    63  	blt	Lneg
    64  Lpos:	bdz	Lend
    65  Lploop:	lu	0,4(4)
    66  	stu	8,4(3)
    67  	cmpi	0,0,0
    68  	mul	10,0,6
    69  	mfmq	0
    70  	ae	8,0,9
    71  	bge	Lp0
    72  	cax	10,10,6		C adjust high limb for negative limb from s1
    73  Lp0:	bdz	Lend0
    74  	lu	0,4(4)
    75  	stu	8,4(3)
    76  	cmpi	0,0,0
    77  	mul	9,0,6
    78  	mfmq	0
    79  	ae	8,0,10
    80  	bge	Lp1
    81  	cax	9,9,6		C adjust high limb for negative limb from s1
    82  Lp1:	bdn	Lploop
    83  	b	Lend
    84  
    85  Lneg:	cax	9,9,0
    86  	bdz	Lend
    87  Lnloop:	lu	0,4(4)
    88  	stu	8,4(3)
    89  	cmpi	0,0,0
    90  	mul	10,0,6
    91  	cax	10,10,0		C adjust high limb for negative s2_limb
    92  	mfmq	0
    93  	ae	8,0,9
    94  	bge	Ln0
    95  	cax	10,10,6		C adjust high limb for negative limb from s1
    96  Ln0:	bdz	Lend0
    97  	lu	0,4(4)
    98  	stu	8,4(3)
    99  	cmpi	0,0,0
   100  	mul	9,0,6
   101  	cax	9,9,0		C adjust high limb for negative s2_limb
   102  	mfmq	0
   103  	ae	8,0,10
   104  	bge	Ln1
   105  	cax	9,9,6		C adjust high limb for negative limb from s1
   106  Ln1:	bdn	Lnloop
   107  	b	Lend
   108  
   109  Lend0:	cal	9,0(10)
   110  Lend:	st	8,4(3)
   111  	aze	3,9
   112  	br
   113  EPILOGUE(mpn_mul_1)