github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/arm/v6/addmul_3.asm (about) 1 dnl ARM mpn_addmul_3. 2 3 dnl Contributed to the GNU project by Torbjörn Granlund. 4 5 dnl Copyright 2013 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C StrongARM: - 37 C XScale - 38 C ARM11 4.33 39 C Cortex-A7 3.23 40 C Cortex-A8 3.19 41 C Cortex-A9 2.125 42 C Cortex-A15 2 43 44 C TODO 45 C * Use a fast path for n <= KARATSUBA_MUL_THRESHOLD using a jump table, 46 C avoiding the current multiply. 47 C * Start the first multiply or multiplies early. 48 49 define(`rp',`r0') 50 define(`up',`r1') 51 define(`n', `r2') 52 define(`vp',`r3') 53 54 define(`v0',`r4') define(`v1',`r5') define(`v2',`r6') 55 define(`u0',`r3') define(`u1',`r14') 56 define(`w0',`r7') define(`w1',`r8') define(`w2',`r9') 57 define(`cy0',`r10') define(`cy1',`r11') define(`cy2',`r12') 58 59 60 ASM_START() 61 PROLOGUE(mpn_addmul_3) 62 push { r4-r11, r14 } 63 64 ldr w0, =0xaaaaaaab C 3^{-1} mod 2^32 65 ldm vp, { v0,v1,v2 } 66 mov cy0, #0 67 mov cy1, #0 68 mov cy2, #0 69 70 C Tricky n mod 6 71 mul w0, w0, n C n * 3^{-1} mod 2^32 72 and w0, w0, #0xc0000001 C pseudo-CRT mod 3,2 73 sub n, n, #3 74 ifdef(`PIC',` 75 add pc, pc, w0, ror $28 76 nop 77 b L(b0) 78 b L(b2) 79 b L(b4) 80 .word 0xe7f000f0 C udf 81 b L(b3) 82 b L(b5) 83 b L(b1) 84 ',` 85 ldr pc, [pc, w0, ror $28] 86 nop 87 .word L(b0), L(b2), L(b4), 0, L(b3), L(b5), L(b1) 88 ') 89 90 L(b5): add up, up, #-8 91 ldr w1, [rp, #0] 92 ldr w2, [rp, #4] 93 ldr u1, [up, #8] 94 b L(lo5) 95 96 L(b4): add rp, rp, #-4 97 add up, up, #-12 98 ldr w2, [rp, #4] 99 ldr w0, [rp, #8] 100 ldr u0, [up, #12] 101 b L(lo4) 102 103 L(b3): add rp, rp, #-8 104 add up, up, #-16 105 ldr w0, [rp, #8] 106 ldr w1, [rp, #12] 107 ldr u1, [up, #16] 108 b L(lo3) 109 110 L(b1): add rp, rp, #8 111 ldr w2, [rp, #-8] 112 ldr w0, [rp, #-4] 113 ldr u1, [up, #0] 114 b L(lo1) 115 116 L(b0): add rp, rp, #4 117 add up, up, #-4 118 ldr w0, [rp, #-4] 119 ldr w1, [rp, #0] 120 ldr u0, [up, #4] 121 b L(lo0) 122 123 L(b2): add rp, rp, #12 124 add up, up, #4 125 ldr w1, [rp, #-12] 126 ldr w2, [rp, #-8] 127 ldr u0, [up, #-4] 128 129 ALIGN(16) 130 L(top): ldr w0, [rp, #-4] 131 umaal w1, cy0, u0, v0 132 ldr u1, [up, #0] 133 umaal w2, cy1, u0, v1 134 str w1, [rp, #-12] 135 umaal w0, cy2, u0, v2 136 L(lo1): ldr w1, [rp, #0] 137 umaal w2, cy0, u1, v0 138 ldr u0, [up, #4] 139 umaal w0, cy1, u1, v1 140 str w2, [rp, #-8] 141 umaal w1, cy2, u1, v2 142 L(lo0): ldr w2, [rp, #4] 143 umaal w0, cy0, u0, v0 144 ldr u1, [up, #8] 145 umaal w1, cy1, u0, v1 146 str w0, [rp, #-4] 147 umaal w2, cy2, u0, v2 148 L(lo5): ldr w0, [rp, #8] 149 umaal w1, cy0, u1, v0 150 ldr u0, [up, #12] 151 umaal w2, cy1, u1, v1 152 str w1, [rp, #0] 153 umaal w0, cy2, u1, v2 154 L(lo4): ldr w1, [rp, #12] 155 umaal w2, cy0, u0, v0 156 ldr u1, [up, #16] 157 umaal w0, cy1, u0, v1 158 str w2, [rp, #4] 159 umaal w1, cy2, u0, v2 160 L(lo3): ldr w2, [rp, #16] 161 umaal w0, cy0, u1, v0 162 ldr u0, [up, #20] 163 umaal w1, cy1, u1, v1 164 str w0, [rp, #8] 165 umaal w2, cy2, u1, v2 166 L(lo2): subs n, n, #6 167 add up, up, #24 168 add rp, rp, #24 169 bge L(top) 170 171 L(end): umaal w1, cy0, u0, v0 172 ldr u1, [up, #0] 173 umaal w2, cy1, u0, v1 174 str w1, [rp, #-12] 175 mov w0, #0 176 umaal w0, cy2, u0, v2 177 umaal w2, cy0, u1, v0 178 umaal w0, cy1, u1, v1 179 str w2, [rp, #-8] 180 umaal cy1, cy2, u1, v2 181 adds w0, w0, cy0 182 str w0, [rp, #-4] 183 adcs w1, cy1, #0 184 str w1, [rp, #0] 185 adc r0, cy2, #0 186 187 pop { r4-r11, pc } 188 EPILOGUE()