github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/k7/invert_limb.asm (about) 1 dnl x86 mpn_invert_limb 2 3 dnl Contributed to the GNU project by Niels Möller 4 5 dnl Copyright 2009, 2011, 2015 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles (approx) div 36 C P5 ? 37 C P6 model 0-8,10-12 ? 38 C P6 model 9 (Banias) ? 39 C P6 model 13 (Dothan) ? 40 C P4 model 0 (Willamette) ? 41 C P4 model 1 (?) ? 42 C P4 model 2 (Northwood) ? 43 C P4 model 3 (Prescott) ? 44 C P4 model 4 (Nocona) ? 45 C AMD K6 ? 46 C AMD K7 41 53 47 C AMD K8 ? 48 49 C TODO 50 C * These c/l numbers are for a non-PIC build. Consider falling back to using 51 C the 'div' instruction for PIC builds. 52 C * Perhaps use this file--or at least the algorithm--for more machines than k7. 53 54 C Register usage: 55 C Input D in %edi 56 C Current approximation is in %eax and/or %ecx 57 C %ebx and %edx are temporaries 58 C %esi and %ebp are unused 59 60 defframe(PARAM_DIVISOR,4) 61 62 ASM_START() 63 64 C Make approx_tab global to work around Apple relocation bug. 65 ifdef(`DARWIN',` 66 deflit(`approx_tab', MPN(invert_limb_tab)) 67 GLOBL approx_tab') 68 69 TEXT 70 ALIGN(16) 71 PROLOGUE(mpn_invert_limb) 72 deflit(`FRAME', 0) 73 mov PARAM_DIVISOR, %eax 74 C Avoid push/pop on k7. 75 sub $8, %esp FRAME_subl_esp(8) 76 mov %ebx, (%esp) 77 mov %edi, 4(%esp) 78 79 mov %eax, %edi 80 shr $22, %eax 81 ifdef(`PIC',` 82 LEAL( approx_tab, %ebx) 83 movzwl -1024(%ebx, %eax, 2), %eax 84 ',` 85 movzwl -1024+approx_tab(%eax, %eax), %eax C %eax = v0 86 ') 87 88 C v1 = (v0 << 4) - ((v0*v0*d_21) >> 32) - 1 89 mov %eax, %ecx 90 imul %eax, %eax 91 mov %edi, %ebx 92 shr $11, %ebx 93 inc %ebx 94 mul %ebx 95 mov %edi, %ebx C Prepare 96 shr %ebx 97 sbb %eax, %eax 98 sub %eax, %ebx C %ebx = d_31, %eax = mask 99 shl $4, %ecx 100 dec %ecx 101 sub %edx, %ecx C %ecx = v1 102 103 C v_2 = (v1 << 15) + ((v1 *(2^48 - v1 * d31 + (v1 >> 1) & mask)) >> 33) 104 imul %ecx, %ebx 105 and %ecx, %eax 106 shr %eax 107 sub %ebx, %eax 108 mul %ecx 109 mov %edi, %eax C Prepare for next mul 110 shl $15, %ecx 111 shr %edx 112 add %edx, %ecx C %ecx = v2 113 114 mul %ecx 115 add %edi, %eax 116 mov %ecx, %eax 117 adc %edi, %edx 118 sub %edx, %eax C %eax = v3 119 120 mov (%esp), %ebx 121 mov 4(%esp), %edi 122 add $8, %esp 123 124 ret 125 126 EPILOGUE() 127 128 DEF_OBJECT(approx_tab,2) 129 .value 0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27 130 .value 0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d 131 .value 0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61 132 .value 0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894 133 .value 0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3 134 .value 0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520 135 .value 0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379 136 .value 0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de 137 .value 0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e 138 .value 0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8 139 .value 0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e 140 .value 0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd 141 .value 0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76 142 .value 0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918 143 .value 0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3 144 .value 0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676 145 .value 0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532 146 .value 0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5 147 .value 0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1 148 .value 0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193 149 .value 0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d 150 .value 0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d 151 .value 0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35 152 .value 0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22 153 .value 0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16 154 .value 0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10 155 .value 0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f 156 .value 0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914 157 .value 0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f 158 .value 0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e 159 .value 0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643 160 .value 0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d 161 .value 0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b 162 .value 0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e 163 .value 0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6 164 .value 0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1 165 .value 0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121 166 .value 0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056 167 .value 0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e 168 .value 0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca 169 .value 0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09 170 .value 0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d 171 .value 0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93 172 .value 0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde 173 .value 0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b 174 .value 0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c 175 .value 0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0 176 .value 0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927 177 .value 0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881 178 .value 0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de 179 .value 0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e 180 .value 0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1 181 .value 0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606 182 .value 0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e 183 .value 0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8 184 .value 0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445 185 .value 0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5 186 .value 0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327 187 .value 0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b 188 .value 0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211 189 .value 0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a 190 .value 0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104 191 .value 0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081 192 .value 0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000 193 END_OBJECT(approx_tab) 194 ASM_END()