github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/power/submul_1.asm (about) 1 dnl IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract 2 dnl the result from a second limb vector. 3 4 dnl Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 33 dnl INPUT PARAMETERS 34 dnl res_ptr r3 35 dnl s1_ptr r4 36 dnl size r5 37 dnl s2_limb r6 38 39 dnl The POWER architecture has no unsigned 32x32->64 bit multiplication 40 dnl instruction. To obtain that operation, we have to use the 32x32->64 41 dnl signed multiplication instruction, and add the appropriate compensation to 42 dnl the high limb of the result. We add the multiplicand if the multiplier 43 dnl has its most significant bit set, and we add the multiplier if the 44 dnl multiplicand has its most significant bit set. We need to preserve the 45 dnl carry flag between each iteration, so we have to compute the compensation 46 dnl carefully (the natural, srai+and doesn't work). Since all POWER can 47 dnl branch in zero cycles, we use conditional branches for the compensation. 48 49 include(`../config.m4') 50 51 ASM_START() 52 PROLOGUE(mpn_submul_1) 53 cal 3,-4(3) 54 l 0,0(4) 55 cmpi 0,6,0 56 mtctr 5 57 mul 9,0,6 58 srai 7,0,31 59 and 7,7,6 60 mfmq 11 61 cax 9,9,7 62 l 7,4(3) 63 sf 8,11,7 C add res_limb 64 a 11,8,11 C invert cy (r11 is junk) 65 blt Lneg 66 Lpos: bdz Lend 67 68 Lploop: lu 0,4(4) 69 stu 8,4(3) 70 cmpi 0,0,0 71 mul 10,0,6 72 mfmq 0 73 ae 11,0,9 C low limb + old_cy_limb + old cy 74 l 7,4(3) 75 aze 10,10 C propagate cy to new cy_limb 76 sf 8,11,7 C add res_limb 77 a 11,8,11 C invert cy (r11 is junk) 78 bge Lp0 79 cax 10,10,6 C adjust high limb for negative limb from s1 80 Lp0: bdz Lend0 81 lu 0,4(4) 82 stu 8,4(3) 83 cmpi 0,0,0 84 mul 9,0,6 85 mfmq 0 86 ae 11,0,10 87 l 7,4(3) 88 aze 9,9 89 sf 8,11,7 90 a 11,8,11 C invert cy (r11 is junk) 91 bge Lp1 92 cax 9,9,6 C adjust high limb for negative limb from s1 93 Lp1: bdn Lploop 94 95 b Lend 96 97 Lneg: cax 9,9,0 98 bdz Lend 99 Lnloop: lu 0,4(4) 100 stu 8,4(3) 101 cmpi 0,0,0 102 mul 10,0,6 103 mfmq 7 104 ae 11,7,9 105 l 7,4(3) 106 ae 10,10,0 C propagate cy to new cy_limb 107 sf 8,11,7 C add res_limb 108 a 11,8,11 C invert cy (r11 is junk) 109 bge Ln0 110 cax 10,10,6 C adjust high limb for negative limb from s1 111 Ln0: bdz Lend0 112 lu 0,4(4) 113 stu 8,4(3) 114 cmpi 0,0,0 115 mul 9,0,6 116 mfmq 7 117 ae 11,7,10 118 l 7,4(3) 119 ae 9,9,0 C propagate cy to new cy_limb 120 sf 8,11,7 C add res_limb 121 a 11,8,11 C invert cy (r11 is junk) 122 bge Ln1 123 cax 9,9,6 C adjust high limb for negative limb from s1 124 Ln1: bdn Lnloop 125 b Lend 126 127 Lend0: cal 9,0(10) 128 Lend: st 8,4(3) 129 aze 3,9 130 br 131 EPILOGUE(mpn_submul_1)