github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/arm/aorslsh1_n.asm (about) 1 dnl ARM mpn_addlsh1_n and mpn_sublsh1_n 2 3 dnl Contributed to the GNU project by Torbjörn Granlund. 4 5 dnl Copyright 2012 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C addlsh1_n sublsh1_n 36 C cycles/limb cycles/limb 37 C StrongARM ? ? 38 C XScale ? ? 39 C Cortex-A7 ? ? 40 C Cortex-A8 ? ? 41 C Cortex-A9 3.12 3.7 42 C Cortex-A15 ? ? 43 44 C TODO 45 C * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1. 46 C The sublsh1_n code could surely be tweaked, its REVCY slows down things 47 C very much. If two insns are really needed, it might help to separate them 48 C for better micro-parallelism. 49 50 define(`rp', `r0') 51 define(`up', `r1') 52 define(`vp', `r2') 53 define(`n', `r3') 54 55 ifdef(`OPERATION_addlsh1_n', ` 56 define(`ADDSUB', adds) 57 define(`ADDSUBC', adcs) 58 define(`SETCY', `cmp $1, #1') 59 define(`RETVAL', `adc r0, $1, #2') 60 define(`SAVECY', `sbc $1, $2, #0') 61 define(`RESTCY', `cmn $1, #1') 62 define(`REVCY', `') 63 define(`INICYR', `mov $1, #0') 64 define(`r10r11', `r11') 65 define(`func', mpn_addlsh1_n) 66 define(`func_nc', mpn_addlsh1_nc)') 67 ifdef(`OPERATION_sublsh1_n', ` 68 define(`ADDSUB', subs) 69 define(`ADDSUBC', sbcs) 70 define(`SETCY', `rsbs $1, $1, #0') 71 define(`RETVAL', `adc r0, $1, #1') 72 define(`SAVECY', `sbc $1, $1, $1') 73 define(`RESTCY', `cmn $1, #1') 74 define(`REVCY', `sbc $1, $1, $1 75 cmn $1, #1') 76 define(`INICYR', `mvn $1, #0') 77 define(`r10r11', `r10') 78 define(`func', mpn_sublsh1_n) 79 define(`func_nc', mpn_sublsh1_nc)') 80 81 MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) 82 83 ASM_START() 84 PROLOGUE(func) 85 push {r4-r10r11, r14} 86 87 ifdef(`OPERATION_addlsh1_n', ` 88 mvn r11, #0 89 ') 90 INICYR( r14) 91 subs n, n, #3 92 blt L(le2) C carry clear on branch path 93 94 cmn r0, #0 C clear carry 95 ldmia vp!, {r8, r9, r10} 96 b L(mid) 97 98 L(top): RESTCY( r14) 99 ADDSUBC r4, r4, r8 100 ADDSUBC r5, r5, r9 101 ADDSUBC r6, r6, r10 102 ldmia vp!, {r8, r9, r10} 103 stmia rp!, {r4, r5, r6} 104 REVCY(r14) 105 adcs r8, r8, r8 106 adcs r9, r9, r9 107 adcs r10, r10, r10 108 ldmia up!, {r4, r5, r6} 109 SAVECY( r14, r11) 110 subs n, n, #3 111 blt L(exi) 112 RESTCY( r12) 113 ADDSUBC r4, r4, r8 114 ADDSUBC r5, r5, r9 115 ADDSUBC r6, r6, r10 116 ldmia vp!, {r8, r9, r10} 117 stmia rp!, {r4, r5, r6} 118 REVCY(r12) 119 L(mid): adcs r8, r8, r8 120 adcs r9, r9, r9 121 adcs r10, r10, r10 122 ldmia up!, {r4, r5, r6} 123 SAVECY( r12, r11) 124 subs n, n, #3 125 bge L(top) 126 127 mov r7, r12 C swap alternating... 128 mov r12, r14 C ...carry-save... 129 mov r14, r7 C ...registers 130 131 L(exi): RESTCY( r12) 132 ADDSUBC r4, r4, r8 133 ADDSUBC r5, r5, r9 134 ADDSUBC r6, r6, r10 135 stmia rp!, {r4, r5, r6} 136 137 REVCY(r12) 138 L(le2): tst n, #1 C n = {-1,-2,-3} map to [2], [1], [0] 139 beq L(e1) 140 141 L(e02): tst n, #2 142 beq L(rt0) 143 ldm vp, {r8, r9} 144 adcs r8, r8, r8 145 adcs r9, r9, r9 146 ldm up, {r4, r5} 147 SAVECY( r12, r11) 148 RESTCY( r14) 149 ADDSUBC r4, r4, r8 150 ADDSUBC r5, r5, r9 151 stm rp, {r4, r5} 152 b L(rt1) 153 154 L(e1): ldr r8, [vp] 155 adcs r8, r8, r8 156 ldr r4, [up] 157 SAVECY( r12, r11) 158 RESTCY( r14) 159 ADDSUBC r4, r4, r8 160 str r4, [rp] 161 162 L(rt1): mov r14, r12 163 REVCY(r12) 164 L(rt0): RETVAL( r14) 165 pop {r4-r10r11, r14} 166 ret r14 167 EPILOGUE()