github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/arm/sec_tabselect.asm (about) 1 dnl ARM mpn_sec_tabselect 2 3 dnl Contributed to the GNU project by Torbjörn Granlund. 4 5 dnl Copyright 2013 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C StrongARM ? 37 C XScale ? 38 C Cortex-A7 ? 39 C Cortex-A8 ? 40 C Cortex-A9 2.33 41 C Cortex-A15 2.2 42 43 C TODO 44 C * Consider using special code for small nents, either swapping the inner and 45 C outer loops, or providing a few completely unrolling the inner loops. 46 47 define(`rp', `r0') 48 define(`tp', `r1') 49 define(`n', `r2') 50 define(`nents', `r3') 51 C which on stack 52 53 define(`i', `r11') 54 define(`j', `r12') 55 define(`c', `r14') 56 define(`mask', `r7') 57 58 ASM_START() 59 PROLOGUE(mpn_sec_tabselect) 60 push {r4-r11, r14} 61 62 subs j, n, #3 63 bmi L(outer_end) 64 L(outer_top): 65 ldr c, [sp, #36] 66 mov i, nents 67 push {tp} 68 69 mov r8, #0 70 mov r9, #0 71 mov r10, #0 72 73 L(top): subs c, c, #1 74 ldm tp, {r4,r5,r6} 75 sbc mask, mask, mask 76 subs i, i, #1 77 add tp, tp, n, lsl #2 78 and r4, r4, mask 79 and r5, r5, mask 80 and r6, r6, mask 81 orr r8, r8, r4 82 orr r9, r9, r5 83 orr r10, r10, r6 84 bge L(top) 85 86 stmia rp!, {r8,r9,r10} 87 pop {tp} 88 add tp, tp, #12 89 subs j, j, #3 90 bpl L(outer_top) 91 L(outer_end): 92 93 cmp j, #-1 94 bne L(n2) 95 96 ldr c, [sp, #36] 97 mov i, nents 98 mov r8, #0 99 mov r9, #0 100 L(tp2): subs c, c, #1 101 sbc mask, mask, mask 102 ldm tp, {r4,r5} 103 subs i, i, #1 104 add tp, tp, n, lsl #2 105 and r4, r4, mask 106 and r5, r5, mask 107 orr r8, r8, r4 108 orr r9, r9, r5 109 bge L(tp2) 110 stmia rp, {r8,r9} 111 pop {r4-r11, r14} 112 ret lr 113 114 L(n2): cmp j, #-2 115 bne L(n1) 116 117 ldr c, [sp, #36] 118 mov i, nents 119 mov r8, #0 120 L(tp1): subs c, c, #1 121 sbc mask, mask, mask 122 ldr r4, [tp] 123 subs i, i, #1 124 add tp, tp, n, lsl #2 125 and r4, r4, mask 126 orr r8, r8, r4 127 bge L(tp1) 128 str r8, [rp] 129 L(n1): pop {r4-r11, r14} 130 ret lr 131 EPILOGUE()