github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/mod_34lsub1.asm (about) 1 dnl Generic x86 mpn_mod_34lsub1 -- mpn remainder modulo 2^24-1. 2 3 dnl Copyright 2000-2002, 2004 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C cycles/limb 35 C P5 3.0 36 C P6 3.66 37 C K6 3.0 38 C K7 1.3 39 C P4 9 40 41 42 C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) 43 C 44 45 defframe(PARAM_SIZE, 8) 46 defframe(PARAM_SRC, 4) 47 48 dnl re-use parameter space 49 define(SAVE_EBX, `PARAM_SRC') 50 51 TEXT 52 ALIGN(16) 53 PROLOGUE(mpn_mod_34lsub1) 54 deflit(`FRAME',0) 55 56 movl PARAM_SIZE, %ecx 57 movl PARAM_SRC, %edx 58 59 subl $2, %ecx 60 ja L(three_or_more) 61 62 movl (%edx), %eax 63 jb L(one) 64 65 movl 4(%edx), %ecx 66 movl %eax, %edx 67 shrl $24, %eax C src[0] low 68 69 andl $0xFFFFFF, %edx C src[0] high 70 addl %edx, %eax 71 movl %ecx, %edx 72 73 andl $0xFFFF, %ecx 74 shrl $16, %edx C src[1] high 75 addl %edx, %eax 76 77 shll $8, %ecx C src[1] low 78 addl %ecx, %eax 79 80 L(one): 81 ret 82 83 84 L(three_or_more): 85 C eax 86 C ebx 87 C ecx size-2 88 C edx src 89 C esi 90 C edi 91 C ebp 92 93 movl %ebx, SAVE_EBX C and arrange 16-byte loop alignment 94 xorl %ebx, %ebx 95 96 pushl %esi FRAME_pushl() 97 xorl %esi, %esi 98 99 pushl %edi FRAME_pushl() 100 xorl %eax, %eax C and clear carry flag 101 102 103 C offset 0x40 here 104 L(top): 105 C eax acc 0mod3 106 C ebx acc 1mod3 107 C ecx counter, limbs 108 C edx src 109 C esi acc 2mod3 110 C edi 111 C ebp 112 113 leal 12(%edx), %edx 114 leal -2(%ecx), %ecx 115 116 adcl -12(%edx), %eax 117 adcl -8(%edx), %ebx 118 adcl -4(%edx), %esi 119 120 decl %ecx 121 jg L(top) 122 123 124 C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively 125 126 movl $0xFFFFFFFF, %edi 127 incl %ecx 128 js L(combine) 129 130 adcl (%edx), %eax 131 movl $0xFFFFFF00, %edi 132 decl %ecx 133 js L(combine) 134 135 adcl 4(%edx), %ebx 136 movl $0xFFFF0000, %edi 137 138 139 L(combine): 140 C eax acc 0mod3 141 C ebx acc 1mod3 142 C ecx 143 C edx 144 C esi acc 2mod3 145 C edi mask 146 C ebp 147 148 sbbl %ecx, %ecx C carry 149 movl %eax, %edx C 0mod3 150 151 shrl $24, %eax C 0mod3 high 152 andl %edi, %ecx C carry masked 153 154 subl %ecx, %eax C apply carry 155 movl %ebx, %edi C 1mod3 156 157 shrl $16, %ebx C 1mod3 high 158 andl $0x00FFFFFF, %edx C 0mod3 low 159 160 addl %edx, %eax C apply 0mod3 low 161 andl $0xFFFF, %edi 162 163 shll $8, %edi C 1mod3 low 164 addl %ebx, %eax C apply 1mod3 high 165 166 addl %edi, %eax C apply 1mod3 low 167 movl %esi, %edx C 2mod3 168 169 shrl $8, %esi C 2mod3 high 170 andl $0xFF, %edx C 2mod3 low 171 172 shll $16, %edx C 2mod3 low 173 addl %esi, %eax C apply 2mod3 high 174 175 addl %edx, %eax C apply 2mod3 low 176 popl %edi FRAME_popl() 177 178 movl SAVE_EBX, %ebx 179 popl %esi FRAME_popl() 180 181 ret 182 183 EPILOGUE()