github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm (about) 1 dnl Intel Atom mpn_bdiv_dbm1. 2 3 dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. 4 5 dnl Copyright 2011 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C cycles/limb 37 C P5 - 38 C P6 model 0-8,10-12 - 39 C P6 model 9 (Banias) 9.75 40 C P6 model 13 (Dothan) 41 C P4 model 0 (Willamette) 42 C P4 model 1 (?) 43 C P4 model 2 (Northwood) 8.25 44 C P4 model 3 (Prescott) 45 C P4 model 4 (Nocona) 46 C Intel Atom 8 47 C AMD K6 - 48 C AMD K7 - 49 C AMD K8 50 C AMD K10 51 52 C TODO: This code was optimised for atom-32, consider moving it back to atom 53 C dir(atom currently grabs this code), and write a 4-way version(7c/l). 54 55 defframe(PARAM_CARRY,20) 56 defframe(PARAM_MUL, 16) 57 defframe(PARAM_SIZE, 12) 58 defframe(PARAM_SRC, 8) 59 defframe(PARAM_DST, 4) 60 61 dnl re-use parameter space 62 define(SAVE_RP,`PARAM_MUL') 63 define(SAVE_UP,`PARAM_SIZE') 64 65 define(`rp', `%edi') 66 define(`up', `%esi') 67 define(`n', `%ecx') 68 define(`reg', `%edx') 69 define(`cy', `%eax') C contains the return value 70 71 ASM_START() 72 TEXT 73 ALIGN(16) 74 deflit(`FRAME',0) 75 76 PROLOGUE(mpn_bdiv_dbm1c) 77 mov PARAM_SIZE, n C size 78 mov up, SAVE_UP 79 mov PARAM_SRC, up 80 movd PARAM_MUL, %mm7 81 mov rp, SAVE_RP 82 mov PARAM_DST, rp 83 84 movd (up), %mm0 85 pmuludq %mm7, %mm0 86 shr n 87 mov PARAM_CARRY, cy 88 jz L(eq1) 89 90 movd 4(up), %mm1 91 jc L(odd) 92 93 lea 4(up), up 94 pmuludq %mm7, %mm1 95 movd %mm0, reg 96 psrlq $32, %mm0 97 sub reg, cy 98 movd %mm0, reg 99 movq %mm1, %mm0 100 dec n 101 mov cy, (rp) 102 lea 4(rp), rp 103 jz L(end) 104 105 C ALIGN(16) 106 L(top): movd 4(up), %mm1 107 sbb reg, cy 108 L(odd): movd %mm0, reg 109 psrlq $32, %mm0 110 pmuludq %mm7, %mm1 111 sub reg, cy 112 lea 8(up), up 113 movd %mm0, reg 114 movd (up), %mm0 115 mov cy, (rp) 116 sbb reg, cy 117 movd %mm1, reg 118 psrlq $32, %mm1 119 sub reg, cy 120 movd %mm1, reg 121 pmuludq %mm7, %mm0 122 dec n 123 mov cy, 4(rp) 124 lea 8(rp), rp 125 jnz L(top) 126 127 L(end): sbb reg, cy 128 129 L(eq1): movd %mm0, reg 130 psrlq $32, %mm0 131 mov SAVE_UP, up 132 sub reg, cy 133 movd %mm0, reg 134 emms 135 mov cy, (rp) 136 sbb reg, cy 137 138 mov SAVE_RP, rp 139 ret 140 EPILOGUE() 141 ASM_END()