github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/ia64/copyd.asm (about) 1 dnl IA-64 mpn_copyd -- copy limb vector, decrementing. 2 3 dnl Contributed to the GNU project by Torbjorn Granlund. 4 5 dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C Itanium: 1 37 C Itanium 2: 0.5 38 39 C INPUT PARAMETERS 40 C rp = r32 41 C sp = r33 42 C n = r34 43 44 ASM_START() 45 PROLOGUE(mpn_copyd) 46 .prologue 47 .save ar.lc, r2 48 .body 49 ifdef(`HAVE_ABI_32', 50 ` addp4 r32 = 0, r32 51 addp4 r33 = 0, r33 52 sxt4 r34 = r34 53 ;; 54 ') 55 {.mmi 56 shladd r32 = r34, 3, r32 57 shladd r33 = r34, 3, r33 58 mov.i r2 = ar.lc 59 } 60 {.mmi 61 and r14 = 3, r34 62 cmp.ge p14, p15 = 3, r34 63 add r34 = -4, r34 64 ;; 65 } 66 {.mmi 67 cmp.eq p8, p0 = 1, r14 68 cmp.eq p10, p0 = 2, r14 69 cmp.eq p12, p0 = 3, r14 70 } 71 {.bbb 72 (p8) br.dptk .Lb01 73 (p10) br.dptk .Lb10 74 (p12) br.dptk .Lb11 75 } 76 77 .Lb00: C n = 0, 4, 8, 12, ... 78 add r32 = -8, r32 79 add r33 = -8, r33 80 (p14) br.dptk .Ls00 81 ;; 82 add r21 = -8, r33 83 ld8 r16 = [r33], -16 84 shr r15 = r34, 2 85 ;; 86 ld8 r17 = [r21], -16 87 mov.i ar.lc = r15 88 ld8 r18 = [r33], -16 89 add r20 = -8, r32 90 ;; 91 ld8 r19 = [r21], -16 92 br.cloop.dptk .Loop 93 ;; 94 br.sptk .Lend 95 ;; 96 97 .Lb01: C n = 1, 5, 9, 13, ... 98 add r21 = -8, r33 99 add r20 = -8, r32 100 add r33 = -16, r33 101 add r32 = -16, r32 102 ;; 103 ld8 r19 = [r21], -16 104 shr r15 = r34, 2 105 (p14) br.dptk .Ls01 106 ;; 107 ld8 r16 = [r33], -16 108 mov.i ar.lc = r15 109 ;; 110 ld8 r17 = [r21], -16 111 ld8 r18 = [r33], -16 112 br.sptk .Li01 113 ;; 114 115 .Lb10: C n = 2,6, 10, 14, ... 116 add r21 = -16, r33 117 shr r15 = r34, 2 118 add r20 = -16, r32 119 add r32 = -8, r32 120 add r33 = -8, r33 121 ;; 122 ld8 r18 = [r33], -16 123 ld8 r19 = [r21], -16 124 mov.i ar.lc = r15 125 (p14) br.dptk .Ls10 126 ;; 127 ld8 r16 = [r33], -16 128 ld8 r17 = [r21], -16 129 br.sptk .Li10 130 ;; 131 132 .Lb11: C n = 3, 7, 11, 15, ... 133 add r21 = -8, r33 134 add r20 = -8, r32 135 add r33 = -16, r33 136 add r32 = -16, r32 137 ;; 138 ld8 r17 = [r21], -16 139 shr r15 = r34, 2 140 ;; 141 ld8 r18 = [r33], -16 142 mov.i ar.lc = r15 143 ld8 r19 = [r21], -16 144 (p14) br.dptk .Ls11 145 ;; 146 ld8 r16 = [r33], -16 147 br.sptk .Li11 148 ;; 149 150 ALIGN(32) 151 .Loop: 152 .Li00: 153 {.mmb 154 st8 [r32] = r16, -16 155 ld8 r16 = [r33], -16 156 nop.b 0 157 } 158 .Li11: 159 {.mmb 160 st8 [r20] = r17, -16 161 ld8 r17 = [r21], -16 162 nop.b 0 163 ;; 164 } 165 .Li10: 166 {.mmb 167 st8 [r32] = r18, -16 168 ld8 r18 = [r33], -16 169 nop.b 0 170 } 171 .Li01: 172 {.mmb 173 st8 [r20] = r19, -16 174 ld8 r19 = [r21], -16 175 br.cloop.dptk .Loop 176 ;; 177 } 178 .Lend: st8 [r32] = r16, -16 179 .Ls11: st8 [r20] = r17, -16 180 ;; 181 .Ls10: st8 [r32] = r18, -16 182 .Ls01: st8 [r20] = r19, -16 183 .Ls00: mov.i ar.lc = r2 184 br.ret.sptk.many b0 185 EPILOGUE() 186 ASM_END()