github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/ia64/sec_tabselect.asm (about) 1 dnl IA-64 mpn_sec_tabselect. 2 3 dnl Copyright 2011 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C Itanium: ? 35 C Itanium 2: 2.5 36 37 C NOTES 38 C * Using software pipelining could trivially yield 2 c/l without unrolling, 39 C or 1+epsilon with unrolling. (This code was modelled after the powerpc64 40 C code, for simplicity.) 41 42 C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which) 43 define(`rp', `r32') 44 define(`tp', `r33') 45 define(`n', `r34') 46 define(`nents', `r35') 47 define(`which', `r36') 48 49 define(`mask', `r8') 50 51 define(`rp1', `r32') 52 define(`tp1', `r33') 53 define(`rp2', `r14') 54 define(`tp2', `r15') 55 56 ASM_START() 57 PROLOGUE(mpn_sec_tabselect) 58 .prologue 59 .save ar.lc, r2 60 .body 61 ifdef(`HAVE_ABI_32',` 62 {.mmi; addp4 rp = 0, rp C M I 63 addp4 tp = 0, tp C M I 64 zxt4 n = n C I 65 }{.mii; nop 0 66 zxt4 nents = nents C I 67 zxt4 which = which C I 68 ;; 69 }') 70 {.mmi; add rp2 = 8, rp1 71 add tp2 = 8, tp1 72 add r6 = -2, n 73 ;; 74 }{.mmi; cmp.eq p10, p0 = 1, n 75 and r9 = 1, n C set cr0 for use in inner loop 76 shr.u r6 = r6, 1 C inner loop count 77 ;; 78 }{.mmi; cmp.eq p8, p0 = 0, r9 79 sub which = nents, which 80 shl n = n, 3 81 ;; 82 } 83 L(outer): 84 {.mmi; cmp.eq p6, p7 = which, nents C are we at the selected table entry? 85 nop 0 86 mov ar.lc = r6 C I0 87 ;; 88 }{.mmb; 89 (p6) mov mask = -1 90 (p7) mov mask = 0 91 (p8) br.dptk L(top) C branch to loop entry if n even 92 ;; 93 }{.mmi; ld8 r16 = [tp1], 8 94 add tp2 = 8, tp2 95 nop 0 96 ;; 97 }{.mmi; ld8 r18 = [rp1] 98 and r16 = r16, mask 99 nop 0 100 ;; 101 }{.mmi; andcm r18 = r18, mask 102 ;; 103 or r16 = r16, r18 104 nop 0 105 ;; 106 }{.mmb; st8 [rp1] = r16, 8 107 add rp2 = 8, rp2 108 (p10) br.dpnt L(end) 109 } 110 ALIGN(32) 111 L(top): 112 {.mmi; ld8 r16 = [tp1], 16 113 ld8 r17 = [tp2], 16 114 nop 0 115 ;; 116 }{.mmi; ld8 r18 = [rp1] 117 and r16 = r16, mask 118 nop 0 119 }{.mmi; ld8 r19 = [rp2] 120 and r17 = r17, mask 121 nop 0 122 ;; 123 }{.mmi; andcm r18 = r18, mask 124 andcm r19 = r19, mask 125 nop 0 126 ;; 127 }{.mmi; or r16 = r16, r18 128 or r17 = r17, r19 129 nop 0 130 ;; 131 }{.mmb; st8 [rp1] = r16, 16 132 st8 [rp2] = r17, 16 133 br.cloop.dptk L(top) 134 ;; 135 } 136 L(end): 137 {.mmi; sub rp1 = rp1, n C move rp back to beginning 138 sub rp2 = rp2, n C move rp back to beginning 139 cmp.ne p9, p0 = 1, nents 140 }{.mmb; add nents = -1, nents 141 nop 0 142 (p9) br.dptk L(outer) 143 ;; 144 }{.mib; nop 0 145 nop 0 146 br.ret.sptk.many b0 147 } 148 EPILOGUE()