github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/arm/v6t2/divrem_1.asm (about) 1 dnl ARM v6t2 mpn_divrem_1 and mpn_preinv_divrem_1. 2 3 dnl Contributed to the GNU project by Torbjörn Granlund. 4 5 dnl Copyright 2012 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C norm unorm frac 36 C StrongARM - - - 37 C XScale - - - 38 C Cortex-A7 ? ? ? 39 C Cortex-A8 ? ? ? 40 C Cortex-A9 13 14 13 41 C Cortex-A15 11.4 11.8 11.1 42 43 C TODO 44 C * Optimise inner-loops better, they could likely run a cycle or two faster. 45 C * Decrease register usage, streamline non-loop code. 46 47 define(`qp_arg', `r0') 48 define(`fn', `r1') 49 define(`up_arg', `r2') 50 define(`n_arg', `r3') 51 define(`d_arg', `0') 52 define(`dinv_arg',`4') 53 define(`cnt_arg', `8') 54 55 define(`n', `r9') 56 define(`qp', `r5') 57 define(`up', `r6') 58 define(`cnt', `r7') 59 define(`tnc', `r10') 60 define(`dinv', `r0') 61 define(`d', `r4') 62 63 ASM_START() 64 PROLOGUE(mpn_preinv_divrem_1) 65 stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr} 66 ldr d, [sp, #9*4+d_arg] 67 ldr cnt, [sp, #9*4+cnt_arg] 68 str r1, [sp, #9*4+d_arg] C reuse d stack slot for fn 69 sub n, r3, #1 70 add r3, r1, n 71 cmp d, #0 72 add qp, qp_arg, r3, lsl #2 C put qp at Q[] end 73 add up, up_arg, n, lsl #2 C put up at U[] end 74 ldr dinv, [sp, #9*4+dinv_arg] 75 blt L(nent) 76 b L(uent) 77 EPILOGUE() 78 79 PROLOGUE(mpn_divrem_1) 80 stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr} 81 sub n, r3, #1 82 ldr d, [sp, #9*4+d_arg] C d 83 str r1, [sp, #9*4+d_arg] C reuse d stack slot for fn 84 add r3, r1, n 85 cmp d, #0 86 add qp, qp_arg, r3, lsl #2 C put qp at Q[] end 87 add up, up_arg, n, lsl #2 C put up at U[] end 88 blt L(normalised) 89 90 L(unnorm): 91 clz cnt, d 92 mov r0, d, lsl cnt C pass d << cnt 93 bl mpn_invert_limb 94 L(uent): 95 mov d, d, lsl cnt C d <<= cnt 96 cmp n, #0 97 mov r1, #0 C r 98 blt L(frac) 99 100 ldr r11, [up, #0] 101 102 rsb tnc, cnt, #32 103 mov r1, r11, lsr tnc 104 mov r11, r11, lsl cnt 105 beq L(uend) 106 107 ldr r3, [up, #-4]! 108 orr r2, r11, r3, lsr tnc 109 b L(mid) 110 111 L(utop): 112 mls r1, d, r8, r11 113 mov r11, r3, lsl cnt 114 ldr r3, [up, #-4]! 115 cmp r1, r2 116 addhi r1, r1, d 117 subhi r8, r8, #1 118 orr r2, r11, r3, lsr tnc 119 cmp r1, d 120 bcs L(ufx) 121 L(uok): str r8, [qp], #-4 122 L(mid): add r8, r1, #1 123 mov r11, r2 124 umlal r2, r8, r1, dinv 125 subs n, n, #1 126 bne L(utop) 127 128 mls r1, d, r8, r11 129 mov r11, r3, lsl cnt 130 cmp r1, r2 131 addhi r1, r1, d 132 subhi r8, r8, #1 133 cmp r1, d 134 rsbcs r1, d, r1 135 addcs r8, r8, #1 136 str r8, [qp], #-4 137 138 L(uend):add r8, r1, #1 139 mov r2, r11 140 umlal r2, r8, r1, dinv 141 mls r1, d, r8, r11 142 cmp r1, r2 143 addhi r1, r1, d 144 subhi r8, r8, #1 145 cmp r1, d 146 rsbcs r1, d, r1 147 addcs r8, r8, #1 148 str r8, [qp], #-4 149 L(frac): 150 ldr r2, [sp, #9*4+d_arg] C fn 151 cmp r2, #0 152 beq L(fend) 153 154 L(ftop):mov r6, #0 155 add r3, r1, #1 156 umlal r6, r3, r1, dinv 157 mov r8, #0 158 mls r1, d, r3, r8 159 cmp r1, r6 160 addhi r1, r1, d 161 subhi r3, r3, #1 162 subs r2, r2, #1 163 str r3, [qp], #-4 164 bne L(ftop) 165 166 L(fend):mov r11, r1, lsr cnt 167 L(rtn): mov r0, r11 168 ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc} 169 170 L(normalised): 171 mov r0, d 172 bl mpn_invert_limb 173 L(nent): 174 cmp n, #0 175 mov r11, #0 C r 176 blt L(nend) 177 178 ldr r11, [up, #0] 179 cmp r11, d 180 movlo r2, #0 C hi q limb 181 movhs r2, #1 C hi q limb 182 subhs r11, r11, d 183 184 str r2, [qp], #-4 185 cmp n, #0 186 beq L(nend) 187 188 L(ntop):ldr r1, [up, #-4]! 189 add r12, r11, #1 190 umlal r1, r12, r11, dinv 191 ldr r3, [up, #0] 192 mls r11, d, r12, r3 193 cmp r11, r1 194 addhi r11, r11, d 195 subhi r12, r12, #1 196 cmp d, r11 197 bls L(nfx) 198 L(nok): str r12, [qp], #-4 199 subs n, n, #1 200 bne L(ntop) 201 202 L(nend):mov r1, r11 C r 203 mov cnt, #0 C shift cnt 204 b L(frac) 205 206 L(nfx): add r12, r12, #1 207 rsb r11, d, r11 208 b L(nok) 209 L(ufx): rsb r1, d, r1 210 add r8, r8, #1 211 b L(uok) 212 EPILOGUE()