github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/pentium4/aors_n.asm (about) 1 dnl x86-64 mpn_add_n/mpn_sub_n optimized for Pentium 4. 2 3 dnl Contributed to the GNU project by Torbjorn Granlund. 4 5 dnl Copyright 2007, 2008, 2010-2012 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 36 C cycles/limb 37 C AMD K8,K9 2.8 38 C AMD K10 2.8 39 C Intel P4 4 40 C Intel core2 3.6-5 (fluctuating) 41 C Intel corei ? 42 C Intel atom ? 43 C VIA nano ? 44 45 46 C INPUT PARAMETERS 47 define(`rp', `%rdi') 48 define(`up', `%rsi') 49 define(`vp', `%rdx') 50 define(`n', `%rcx') 51 define(`cy', `%r8') 52 53 ifdef(`OPERATION_add_n', ` 54 define(ADDSUB, add) 55 define(func, mpn_add_n) 56 define(func_nc, mpn_add_nc)') 57 ifdef(`OPERATION_sub_n', ` 58 define(ADDSUB, sub) 59 define(func, mpn_sub_n) 60 define(func_nc, mpn_sub_nc)') 61 62 ABI_SUPPORT(DOS64) 63 ABI_SUPPORT(STD64) 64 65 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 66 ASM_START() 67 TEXT 68 PROLOGUE(func) 69 FUNC_ENTRY(4) 70 xor %r8, %r8 71 IFDOS(` jmp L(ent) ') 72 EPILOGUE() 73 PROLOGUE(func_nc) 74 FUNC_ENTRY(4) 75 IFDOS(` mov 56(%rsp), %r8 ') 76 L(ent): push %rbx 77 push %r12 78 79 mov (vp), %r9 80 81 mov R32(n), R32(%rax) 82 and $3, R32(%rax) 83 jne L(n00) C n = 0, 4, 8, ... 84 mov R32(%r8), R32(%rbx) 85 mov (up), %r8 86 mov 8(up), %r10 87 ADDSUB %r9, %r8 88 mov 8(vp), %r9 89 setc R8(%rax) 90 lea -16(rp), rp 91 jmp L(L00) 92 93 L(n00): cmp $2, R32(%rax) 94 jnc L(n01) C n = 1, 5, 9, ... 95 mov (up), %r11 96 mov R32(%r8), R32(%rax) 97 xor R32(%rbx), R32(%rbx) 98 dec n 99 jnz L(gt1) 100 ADDSUB %r9, %r11 101 setc R8(%rbx) 102 ADDSUB %rax, %r11 103 adc $0, R32(%rbx) 104 mov %r11, (rp) 105 jmp L(ret) 106 L(gt1): mov 8(up), %r8 107 ADDSUB %r9, %r11 108 mov 8(vp), %r9 109 setc R8(%rbx) 110 lea -8(rp), rp 111 lea 8(up), up 112 lea 8(vp), vp 113 jmp L(L01) 114 115 L(n01): jne L(n10) C n = 2, 6, 10, ... 116 mov (up), %r12 117 mov R32(%r8), R32(%rbx) 118 mov 8(up), %r11 119 ADDSUB %r9, %r12 120 mov 8(vp), %r9 121 setc R8(%rax) 122 lea -32(rp), rp 123 lea 16(up), up 124 lea 16(vp), vp 125 jmp L(L10) 126 127 L(n10): mov (up), %r10 C n = 3, 7, 11, ... 128 mov R32(%r8), R32(%rax) 129 xor R32(%rbx), R32(%rbx) 130 mov 8(up), %r12 131 ADDSUB %r9, %r10 132 mov 8(vp), %r9 133 setc R8(%rbx) 134 lea -24(rp), rp 135 lea -8(up), up 136 lea -8(vp), vp 137 jmp L(L11) 138 139 L(c0): mov $1, R8(%rbx) 140 jmp L(rc0) 141 L(c1): mov $1, R8(%rax) 142 jmp L(rc1) 143 L(c2): mov $1, R8(%rbx) 144 jmp L(rc2) 145 L(c3): mov $1, R8(%rax) 146 jmp L(rc3) 147 148 ALIGN(16) 149 L(top): mov (up), %r8 C not on critical path 150 ADDSUB %r9, %r11 C not on critical path 151 mov (vp), %r9 C not on critical path 152 setc R8(%rbx) C save carry out 153 mov %r12, (rp) 154 L(L01): ADDSUB %rax, %r11 C apply previous carry out 155 jc L(c0) C jump if ripple 156 L(rc0): mov 8(up), %r10 157 ADDSUB %r9, %r8 158 mov 8(vp), %r9 159 setc R8(%rax) 160 mov %r11, 8(rp) 161 L(L00): ADDSUB %rbx, %r8 162 jc L(c1) 163 L(rc1): mov 16(up), %r12 164 ADDSUB %r9, %r10 165 mov 16(vp), %r9 166 setc R8(%rbx) 167 mov %r8, 16(rp) 168 L(L11): ADDSUB %rax, %r10 169 jc L(c2) 170 L(rc2): mov 24(up), %r11 171 ADDSUB %r9, %r12 172 lea 32(up), up 173 mov 24(vp), %r9 174 lea 32(vp), vp 175 setc R8(%rax) 176 mov %r10, 24(rp) 177 L(L10): ADDSUB %rbx, %r12 178 jc L(c3) 179 L(rc3): lea 32(rp), rp 180 sub $4, n 181 ja L(top) 182 183 L(end): ADDSUB %r9, %r11 184 setc R8(%rbx) 185 mov %r12, (rp) 186 ADDSUB %rax, %r11 187 jnc L(1) 188 mov $1, R8(%rbx) 189 L(1): mov %r11, 8(rp) 190 191 L(ret): mov R32(%rbx), R32(%rax) 192 pop %r12 193 pop %rbx 194 FUNC_EXIT() 195 ret 196 EPILOGUE()