github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/bdiv_q_1.asm (about)

     1  dnl  x86 mpn_bdiv_q_1 -- mpn by limb exact division.
     2  
     3  dnl  Rearranged from mpn/x86/dive_1.asm by Marco Bodrato.
     4  
     5  dnl  Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  
    36  C     cycles/limb
    37  C P54    30.0
    38  C P55    29.0
    39  C P6     13.0 odd divisor, 12.0 even (strangely)
    40  C K6     14.0
    41  C K7     12.0
    42  C P4     42.0
    43  
    44  MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
    45  
    46  defframe(PARAM_SHIFT,  24)
    47  defframe(PARAM_INVERSE,20)
    48  defframe(PARAM_DIVISOR,16)
    49  defframe(PARAM_SIZE,   12)
    50  defframe(PARAM_SRC,    8)
    51  defframe(PARAM_DST,    4)
    52  
    53  dnl  re-use parameter space
    54  define(VAR_INVERSE,`PARAM_SRC')
    55  
    56  	TEXT
    57  
    58  C mp_limb_t
    59  C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
    60  C		    mp_limb_t inverse, int shift)
    61  
    62  	ALIGN(16)
    63  PROLOGUE(mpn_pi1_bdiv_q_1)
    64  deflit(`FRAME',0)
    65  
    66  	movl	PARAM_SHIFT, %ecx
    67  	pushl	%ebp	FRAME_pushl()
    68  
    69  	movl	PARAM_INVERSE, %eax
    70  	movl	PARAM_SIZE, %ebp
    71  	pushl	%ebx	FRAME_pushl()
    72  L(common):
    73  	pushl	%edi	FRAME_pushl()
    74  	pushl	%esi	FRAME_pushl()
    75  
    76  	movl	PARAM_SRC, %esi
    77  	movl	PARAM_DST, %edi
    78  
    79  	leal	(%esi,%ebp,4), %esi	C src end
    80  	leal	(%edi,%ebp,4), %edi	C dst end
    81  	negl	%ebp			C -size
    82  
    83  	movl	%eax, VAR_INVERSE
    84  	movl	(%esi,%ebp,4), %eax	C src[0]
    85  
    86  	xorl	%ebx, %ebx
    87  	xorl	%edx, %edx
    88  
    89  	incl	%ebp
    90  	jz	L(one)
    91  
    92  	movl	(%esi,%ebp,4), %edx	C src[1]
    93  
    94  	shrdl(	%cl, %edx, %eax)
    95  
    96  	movl	VAR_INVERSE, %edx
    97  	jmp	L(entry)
    98  
    99  
   100  	ALIGN(8)
   101  	nop	C k6 code alignment
   102  	nop
   103  L(top):
   104  	C eax	q
   105  	C ebx	carry bit, 0 or -1
   106  	C ecx	shift
   107  	C edx	carry limb
   108  	C esi	src end
   109  	C edi	dst end
   110  	C ebp	counter, limbs, negative
   111  
   112  	movl	-4(%esi,%ebp,4), %eax
   113  	subl	%ebx, %edx		C accumulate carry bit
   114  
   115  	movl	(%esi,%ebp,4), %ebx
   116  
   117  	shrdl(	%cl, %ebx, %eax)
   118  
   119  	subl	%edx, %eax		C apply carry limb
   120  	movl	VAR_INVERSE, %edx
   121  
   122  	sbbl	%ebx, %ebx
   123  
   124  L(entry):
   125  	imull	%edx, %eax
   126  
   127  	movl	%eax, -4(%edi,%ebp,4)
   128  	movl	PARAM_DIVISOR, %edx
   129  
   130  	mull	%edx
   131  
   132  	incl	%ebp
   133  	jnz	L(top)
   134  
   135  
   136  	movl	-4(%esi), %eax		C src high limb
   137  L(one):
   138  	shrl	%cl, %eax
   139  	popl	%esi	FRAME_popl()
   140  
   141  	addl	%ebx, %eax		C apply carry bit
   142  
   143  	subl	%edx, %eax		C apply carry limb
   144  
   145  	imull	VAR_INVERSE, %eax
   146  
   147  	movl	%eax, -4(%edi)
   148  
   149  	popl	%edi
   150  	popl	%ebx
   151  	popl	%ebp
   152  
   153  	ret
   154  
   155  EPILOGUE()
   156  
   157  C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
   158  C                           mp_limb_t divisor);
   159  C
   160  
   161  	ALIGN(16)
   162  PROLOGUE(mpn_bdiv_q_1)
   163  deflit(`FRAME',0)
   164  
   165  	movl	PARAM_DIVISOR, %eax
   166  	pushl	%ebp	FRAME_pushl()
   167  
   168  	movl	$-1, %ecx		C shift count
   169  	movl	PARAM_SIZE, %ebp
   170  
   171  	pushl	%ebx	FRAME_pushl()
   172  
   173  L(strip_twos):
   174  	incl	%ecx
   175  
   176  	shrl	%eax
   177  	jnc	L(strip_twos)
   178  
   179  	leal	1(%eax,%eax), %ebx	C d without twos
   180  	andl	$127, %eax		C d/2, 7 bits
   181  
   182  ifdef(`PIC',`
   183  	LEA(	binvert_limb_table, %edx)
   184  	movzbl	(%eax,%edx), %eax		C inv 8 bits
   185  ',`
   186  	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
   187  ')
   188  
   189  	leal	(%eax,%eax), %edx	C 2*inv
   190  	movl	%ebx, PARAM_DIVISOR	C d without twos
   191  	imull	%eax, %eax		C inv*inv
   192  	imull	%ebx, %eax		C inv*inv*d
   193  	subl	%eax, %edx		C inv = 2*inv - inv*inv*d
   194  
   195  	leal	(%edx,%edx), %eax	C 2*inv
   196  	imull	%edx, %edx		C inv*inv
   197  	imull	%ebx, %edx		C inv*inv*d
   198  	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
   199  
   200  	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
   201  	pushl	%eax	FRAME_pushl()
   202  	imull	PARAM_DIVISOR, %eax
   203  	cmpl	$1, %eax
   204  	popl	%eax	FRAME_popl()')
   205  
   206  	jmp	L(common)
   207  EPILOGUE()
   208  ASM_END()