github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/aors_n.asm (about) 1 dnl AMD64 mpn_add_n, mpn_sub_n 2 3 dnl Copyright 2003-2005, 2007, 2008, 2010-2012 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C AMD K8,K9 1.5 35 C AMD K10 1.5 36 C AMD bd1 1.8 37 C AMD bobcat 2.5 38 C Intel P4 39 C Intel core2 4.9 40 C Intel NHM 5.5 41 C Intel SBR 1.61 42 C Intel IBR 1.61 43 C Intel atom 4 44 C VIA nano 3.25 45 46 C The loop of this code is the result of running a code generation and 47 C optimization tool suite written by David Harvey and Torbjorn Granlund. 48 49 C INPUT PARAMETERS 50 define(`rp', `%rdi') C rcx 51 define(`up', `%rsi') C rdx 52 define(`vp', `%rdx') C r8 53 define(`n', `%rcx') C r9 54 define(`cy', `%r8') C rsp+40 (mpn_add_nc and mpn_sub_nc) 55 56 ifdef(`OPERATION_add_n', ` 57 define(ADCSBB, adc) 58 define(func, mpn_add_n) 59 define(func_nc, mpn_add_nc)') 60 ifdef(`OPERATION_sub_n', ` 61 define(ADCSBB, sbb) 62 define(func, mpn_sub_n) 63 define(func_nc, mpn_sub_nc)') 64 65 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 66 67 ABI_SUPPORT(DOS64) 68 ABI_SUPPORT(STD64) 69 70 ASM_START() 71 TEXT 72 ALIGN(16) 73 PROLOGUE(func_nc) 74 FUNC_ENTRY(4) 75 IFDOS(` mov 56(%rsp), %r8 ') 76 mov R32(n), R32(%rax) 77 shr $2, n 78 and $3, R32(%rax) 79 bt $0, %r8 C cy flag <- carry parameter 80 jrcxz L(lt4) 81 82 mov (up), %r8 83 mov 8(up), %r9 84 dec n 85 jmp L(mid) 86 87 EPILOGUE() 88 ALIGN(16) 89 PROLOGUE(func) 90 FUNC_ENTRY(4) 91 mov R32(n), R32(%rax) 92 shr $2, n 93 and $3, R32(%rax) 94 jrcxz L(lt4) 95 96 mov (up), %r8 97 mov 8(up), %r9 98 dec n 99 jmp L(mid) 100 101 L(lt4): dec R32(%rax) 102 mov (up), %r8 103 jnz L(2) 104 ADCSBB (vp), %r8 105 mov %r8, (rp) 106 adc R32(%rax), R32(%rax) 107 FUNC_EXIT() 108 ret 109 110 L(2): dec R32(%rax) 111 mov 8(up), %r9 112 jnz L(3) 113 ADCSBB (vp), %r8 114 ADCSBB 8(vp), %r9 115 mov %r8, (rp) 116 mov %r9, 8(rp) 117 adc R32(%rax), R32(%rax) 118 FUNC_EXIT() 119 ret 120 121 L(3): mov 16(up), %r10 122 ADCSBB (vp), %r8 123 ADCSBB 8(vp), %r9 124 ADCSBB 16(vp), %r10 125 mov %r8, (rp) 126 mov %r9, 8(rp) 127 mov %r10, 16(rp) 128 setc R8(%rax) 129 FUNC_EXIT() 130 ret 131 132 ALIGN(16) 133 L(top): ADCSBB (vp), %r8 134 ADCSBB 8(vp), %r9 135 ADCSBB 16(vp), %r10 136 ADCSBB 24(vp), %r11 137 mov %r8, (rp) 138 lea 32(up), up 139 mov %r9, 8(rp) 140 mov %r10, 16(rp) 141 dec n 142 mov %r11, 24(rp) 143 lea 32(vp), vp 144 mov (up), %r8 145 mov 8(up), %r9 146 lea 32(rp), rp 147 L(mid): mov 16(up), %r10 148 mov 24(up), %r11 149 jnz L(top) 150 151 L(end): lea 32(up), up 152 ADCSBB (vp), %r8 153 ADCSBB 8(vp), %r9 154 ADCSBB 16(vp), %r10 155 ADCSBB 24(vp), %r11 156 lea 32(vp), vp 157 mov %r8, (rp) 158 mov %r9, 8(rp) 159 mov %r10, 16(rp) 160 mov %r11, 24(rp) 161 lea 32(rp), rp 162 163 inc R32(%rax) 164 dec R32(%rax) 165 jnz L(lt4) 166 adc R32(%rax), R32(%rax) 167 FUNC_EXIT() 168 ret 169 EPILOGUE()