github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/power/addmul_1.asm (about) 1 dnl IBM POWER mpn_addmul_1 -- Multiply a limb vector with a limb and add the 2 dnl result to a second limb vector. 3 4 dnl Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 33 dnl INPUT PARAMETERS 34 dnl res_ptr r3 35 dnl s1_ptr r4 36 dnl size r5 37 dnl s2_limb r6 38 39 dnl The POWER architecture has no unsigned 32x32->64 bit multiplication 40 dnl instruction. To obtain that operation, we have to use the 32x32->64 41 dnl signed multiplication instruction, and add the appropriate compensation to 42 dnl the high limb of the result. We add the multiplicand if the multiplier 43 dnl has its most significant bit set, and we add the multiplier if the 44 dnl multiplicand has its most significant bit set. We need to preserve the 45 dnl carry flag between each iteration, so we have to compute the compensation 46 dnl carefully (the natural, srai+and doesn't work). Since all POWER can 47 dnl branch in zero cycles, we use conditional branches for the compensation. 48 49 include(`../config.m4') 50 51 ASM_START() 52 PROLOGUE(mpn_addmul_1) 53 cal 3,-4(3) 54 l 0,0(4) 55 cmpi 0,6,0 56 mtctr 5 57 mul 9,0,6 58 srai 7,0,31 59 and 7,7,6 60 mfmq 8 61 cax 9,9,7 62 l 7,4(3) 63 a 8,8,7 C add res_limb 64 blt Lneg 65 Lpos: bdz Lend 66 67 Lploop: lu 0,4(4) 68 stu 8,4(3) 69 cmpi 0,0,0 70 mul 10,0,6 71 mfmq 0 72 ae 8,0,9 C low limb + old_cy_limb + old cy 73 l 7,4(3) 74 aze 10,10 C propagate cy to new cy_limb 75 a 8,8,7 C add res_limb 76 bge Lp0 77 cax 10,10,6 C adjust high limb for negative limb from s1 78 Lp0: bdz Lend0 79 lu 0,4(4) 80 stu 8,4(3) 81 cmpi 0,0,0 82 mul 9,0,6 83 mfmq 0 84 ae 8,0,10 85 l 7,4(3) 86 aze 9,9 87 a 8,8,7 88 bge Lp1 89 cax 9,9,6 C adjust high limb for negative limb from s1 90 Lp1: bdn Lploop 91 92 b Lend 93 94 Lneg: cax 9,9,0 95 bdz Lend 96 Lnloop: lu 0,4(4) 97 stu 8,4(3) 98 cmpi 0,0,0 99 mul 10,0,6 100 mfmq 7 101 ae 8,7,9 102 l 7,4(3) 103 ae 10,10,0 C propagate cy to new cy_limb 104 a 8,8,7 C add res_limb 105 bge Ln0 106 cax 10,10,6 C adjust high limb for negative limb from s1 107 Ln0: bdz Lend0 108 lu 0,4(4) 109 stu 8,4(3) 110 cmpi 0,0,0 111 mul 9,0,6 112 mfmq 7 113 ae 8,7,10 114 l 7,4(3) 115 ae 9,9,0 C propagate cy to new cy_limb 116 a 8,8,7 C add res_limb 117 bge Ln1 118 cax 9,9,6 C adjust high limb for negative limb from s1 119 Ln1: bdn Lnloop 120 b Lend 121 122 Lend0: cal 9,0(10) 123 Lend: st 8,4(3) 124 aze 3,9 125 br 126 EPILOGUE(mpn_addmul_1)