github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/ia64/logops_n.asm (about) 1 dnl IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n, 2 dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations. 3 4 dnl Contributed to the GNU project by Torbjorn Granlund. 5 6 dnl Copyright 2003-2005 Free Software Foundation, Inc. 7 8 dnl This file is part of the GNU MP Library. 9 dnl 10 dnl The GNU MP Library is free software; you can redistribute it and/or modify 11 dnl it under the terms of either: 12 dnl 13 dnl * the GNU Lesser General Public License as published by the Free 14 dnl Software Foundation; either version 3 of the License, or (at your 15 dnl option) any later version. 16 dnl 17 dnl or 18 dnl 19 dnl * the GNU General Public License as published by the Free Software 20 dnl Foundation; either version 2 of the License, or (at your option) any 21 dnl later version. 22 dnl 23 dnl or both in parallel, as here. 24 dnl 25 dnl The GNU MP Library is distributed in the hope that it will be useful, but 26 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28 dnl for more details. 29 dnl 30 dnl You should have received copies of the GNU General Public License and the 31 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 32 dnl see https://www.gnu.org/licenses/. 33 34 include(`../config.m4') 35 36 C cycles/limb 37 C Itanium: 2 38 C Itanium 2: 1 39 40 C TODO 41 C * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in 42 C wind-down code). 43 44 C INPUT PARAMETERS 45 define(`rp', `r32') 46 define(`up', `r33') 47 define(`vp', `r34') 48 define(`n', `r35') 49 50 ifdef(`OPERATION_and_n', 51 ` define(`func',`mpn_and_n') 52 define(`logop', `and $1 = $2, $3') 53 define(`notormov', `mov $1 = $2')') 54 ifdef(`OPERATION_andn_n', 55 ` define(`func',`mpn_andn_n') 56 define(`logop', `andcm $1 = $2, $3') 57 define(`notormov', `mov $1 = $2')') 58 ifdef(`OPERATION_nand_n', 59 ` define(`func',`mpn_nand_n') 60 define(`logop', `and $1 = $2, $3') 61 define(`notormov', `sub $1 = -1, $2')') 62 ifdef(`OPERATION_ior_n', 63 ` define(`func',`mpn_ior_n') 64 define(`logop', `or $1 = $2, $3') 65 define(`notormov', `mov $1 = $2')') 66 ifdef(`OPERATION_iorn_n', 67 ` define(`func',`mpn_iorn_n') 68 define(`logop', `andcm $1 = $3, $2') 69 define(`notormov', `sub $1 = -1, $2')') 70 ifdef(`OPERATION_nior_n', 71 ` define(`func',`mpn_nior_n') 72 define(`logop', `or $1 = $2, $3') 73 define(`notormov', `sub $1 = -1, $2')') 74 ifdef(`OPERATION_xor_n', 75 ` define(`func',`mpn_xor_n') 76 define(`logop', `xor $1 = $2, $3') 77 define(`notormov', `mov $1 = $2')') 78 ifdef(`OPERATION_xnor_n', 79 ` define(`func',`mpn_xnor_n') 80 define(`logop', `xor $1 = $2, $3') 81 define(`notormov', `sub $1 = -1, $2')') 82 83 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) 84 85 ASM_START() 86 PROLOGUE(func) 87 .prologue 88 .save ar.lc, r2 89 .body 90 ifdef(`HAVE_ABI_32', 91 ` addp4 rp = 0, rp C M I 92 addp4 up = 0, up C M I 93 addp4 vp = 0, vp C M I 94 nop.m 0 95 nop.m 0 96 zxt4 n = n C I 97 ;; 98 ') 99 {.mmi 100 ld8 r10 = [up], 8 C M 101 ld8 r11 = [vp], 8 C M 102 mov.i r2 = ar.lc C I0 103 } 104 {.mmi 105 and r14 = 3, n C M I 106 cmp.lt p15, p14 = 4, n C M I 107 shr.u n = n, 2 C I0 108 ;; 109 } 110 {.mmi 111 cmp.eq p6, p0 = 1, r14 C M I 112 cmp.eq p7, p0 = 2, r14 C M I 113 cmp.eq p8, p0 = 3, r14 C M I 114 } 115 {.bbb 116 (p6) br.dptk .Lb01 C B 117 (p7) br.dptk .Lb10 C B 118 (p8) br.dptk .Lb11 C B 119 } 120 121 .Lb00: ld8 r17 = [up], 8 C M 122 ld8 r21 = [vp], 8 C M 123 add n = -2, n C M I 124 ;; 125 ld8 r18 = [up], 8 C M 126 ld8 r22 = [vp], 8 C M 127 ;; 128 ld8 r19 = [up], 8 C M 129 ld8 r23 = [vp], 8 C M 130 (p15) br.cond.dpnt .grt4 C B 131 132 logop( r14, r10, r11) C M I 133 ;; 134 logop( r15, r17, r21) C M I 135 notormov( r8, r14) C M I 136 br .Lcj4 C B 137 138 .grt4: logop( r14, r10, r11) C M I 139 ld8 r16 = [up], 8 C M 140 ld8 r20 = [vp], 8 C M 141 ;; 142 logop( r15, r17, r21) C M I 143 ld8 r17 = [up], 8 C M 144 mov.i ar.lc = n C I0 145 notormov( r8, r14) C M I 146 ld8 r21 = [vp], 8 C M 147 br .LL00 C B 148 149 .Lb01: add n = -1, n C M I 150 logop( r15, r10, r11) C M I 151 (p15) br.cond.dpnt .grt1 C B 152 ;; 153 154 notormov( r9, r15) C M I 155 br .Lcj1 C B 156 157 .grt1: ld8 r16 = [up], 8 C M 158 ld8 r20 = [vp], 8 C M 159 ;; 160 ld8 r17 = [up], 8 C M 161 ld8 r21 = [vp], 8 C M 162 mov.i ar.lc = n C I0 163 ;; 164 ld8 r18 = [up], 8 C M 165 ld8 r22 = [vp], 8 C M 166 ;; 167 ld8 r19 = [up], 8 C M 168 ld8 r23 = [vp], 8 C M 169 br.cloop.dptk .grt5 C B 170 ;; 171 172 logop( r14, r16, r20) C M I 173 notormov( r9, r15) C M I 174 br .Lcj5 C B 175 176 .grt5: logop( r14, r16, r20) C M I 177 ld8 r16 = [up], 8 C M 178 notormov( r9, r15) C M I 179 ld8 r20 = [vp], 8 C M 180 br .LL01 C B 181 182 .Lb10: ld8 r19 = [up], 8 C M 183 ld8 r23 = [vp], 8 C M 184 (p15) br.cond.dpnt .grt2 C B 185 186 logop( r14, r10, r11) C M I 187 ;; 188 logop( r15, r19, r23) C M I 189 notormov( r8, r14) C M I 190 br .Lcj2 C B 191 192 .grt2: ld8 r16 = [up], 8 C M 193 ld8 r20 = [vp], 8 C M 194 add n = -1, n C M I 195 ;; 196 ld8 r17 = [up], 8 C M 197 ld8 r21 = [vp], 8 C M 198 logop( r14, r10, r11) C M I 199 ;; 200 ld8 r18 = [up], 8 C M 201 ld8 r22 = [vp], 8 C M 202 mov.i ar.lc = n C I0 203 ;; 204 logop( r15, r19, r23) C M I 205 ld8 r19 = [up], 8 C M 206 notormov( r8, r14) C M I 207 ld8 r23 = [vp], 8 C M 208 br.cloop.dptk .Loop C B 209 br .Lcj6 C B 210 211 .Lb11: ld8 r18 = [up], 8 C M 212 ld8 r22 = [vp], 8 C M 213 add n = -1, n C M I 214 ;; 215 ld8 r19 = [up], 8 C M 216 ld8 r23 = [vp], 8 C M 217 logop( r15, r10, r11) C M I 218 (p15) br.cond.dpnt .grt3 C B 219 ;; 220 221 logop( r14, r18, r22) C M I 222 notormov( r9, r15) C M I 223 br .Lcj3 C B 224 225 .grt3: ld8 r16 = [up], 8 C M 226 ld8 r20 = [vp], 8 C M 227 ;; 228 ld8 r17 = [up], 8 C M 229 ld8 r21 = [vp], 8 C M 230 mov.i ar.lc = n C I0 231 ;; 232 logop( r14, r18, r22) C M I 233 ld8 r18 = [up], 8 C M 234 notormov( r9, r15) C M I 235 ld8 r22 = [vp], 8 C M 236 br .LL11 C B 237 238 C *** MAIN LOOP START *** 239 ALIGN(32) 240 .Loop: st8 [rp] = r8, 8 C M 241 logop( r14, r16, r20) C M I 242 notormov( r9, r15) C M I 243 ld8 r16 = [up], 8 C M 244 ld8 r20 = [vp], 8 C M 245 nop.b 0 246 ;; 247 .LL01: st8 [rp] = r9, 8 C M 248 logop( r15, r17, r21) C M I 249 notormov( r8, r14) C M I 250 ld8 r17 = [up], 8 C M 251 ld8 r21 = [vp], 8 C M 252 nop.b 0 253 ;; 254 .LL00: st8 [rp] = r8, 8 C M 255 logop( r14, r18, r22) C M I 256 notormov( r9, r15) C M I 257 ld8 r18 = [up], 8 C M 258 ld8 r22 = [vp], 8 C M 259 nop.b 0 260 ;; 261 .LL11: st8 [rp] = r9, 8 C M 262 logop( r15, r19, r23) C M I 263 notormov( r8, r14) C M I 264 ld8 r19 = [up], 8 C M 265 ld8 r23 = [vp], 8 C M 266 br.cloop.dptk .Loop ;; C B 267 C *** MAIN LOOP END *** 268 269 .Lcj6: st8 [rp] = r8, 8 C M 270 logop( r14, r16, r20) C M I 271 notormov( r9, r15) C M I 272 ;; 273 .Lcj5: st8 [rp] = r9, 8 C M 274 logop( r15, r17, r21) C M I 275 notormov( r8, r14) C M I 276 ;; 277 .Lcj4: st8 [rp] = r8, 8 C M 278 logop( r14, r18, r22) C M I 279 notormov( r9, r15) C M I 280 ;; 281 .Lcj3: st8 [rp] = r9, 8 C M 282 logop( r15, r19, r23) C M I 283 notormov( r8, r14) C M I 284 ;; 285 .Lcj2: st8 [rp] = r8, 8 C M 286 notormov( r9, r15) C M I 287 ;; 288 .Lcj1: st8 [rp] = r9, 8 C M 289 mov.i ar.lc = r2 C I0 290 br.ret.sptk.many b0 C B 291 EPILOGUE() 292 ASM_END()