github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/dive_1.asm (about)

     1  dnl  x86 mpn_divexact_1 -- mpn by limb exact division.
     2  
     3  dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C     cycles/limb
    35  C P54    30.0
    36  C P55    29.0
    37  C P6     13.0 odd divisor, 12.0 even (strangely)
    38  C K6     14.0
    39  C K7     12.0
    40  C P4     42.0
    41  
    42  
    43  C mp_limb_t mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
    44  C                           mp_limb_t divisor);
    45  C
    46  
    47  defframe(PARAM_DIVISOR,16)
    48  defframe(PARAM_SIZE,   12)
    49  defframe(PARAM_SRC,    8)
    50  defframe(PARAM_DST,    4)
    51  
    52  dnl  re-use parameter space
    53  define(VAR_INVERSE,`PARAM_SRC')
    54  
    55  	TEXT
    56  
    57  	ALIGN(16)
    58  PROLOGUE(mpn_divexact_1)
    59  deflit(`FRAME',0)
    60  
    61  	movl	PARAM_DIVISOR, %eax
    62  	pushl	%ebp	FRAME_pushl()
    63  
    64  	movl	PARAM_SIZE, %ebp
    65  	pushl	%edi	FRAME_pushl()
    66  
    67  	pushl	%ebx	FRAME_pushl()
    68  	movl	$-1, %ecx		C shift count
    69  
    70  	pushl	%esi	FRAME_pushl()
    71  
    72  L(strip_twos):
    73  	incl	%ecx
    74  
    75  	shrl	%eax
    76  	jnc	L(strip_twos)
    77  
    78  	leal	1(%eax,%eax), %ebx	C d without twos
    79  	andl	$127, %eax		C d/2, 7 bits
    80  
    81  ifdef(`PIC',`
    82  	LEA(	binvert_limb_table, %edx)
    83  	movzbl	(%eax,%edx), %eax		C inv 8 bits
    84  ',`
    85  	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
    86  ')
    87  
    88  	leal	(%eax,%eax), %edx	C 2*inv
    89  	movl	%ebx, PARAM_DIVISOR	C d without twos
    90  
    91  	imull	%eax, %eax		C inv*inv
    92  
    93  	movl	PARAM_SRC, %esi
    94  	movl	PARAM_DST, %edi
    95  
    96  	imull	%ebx, %eax		C inv*inv*d
    97  
    98  	subl	%eax, %edx		C inv = 2*inv - inv*inv*d
    99  	leal	(%edx,%edx), %eax	C 2*inv
   100  
   101  	imull	%edx, %edx		C inv*inv
   102  
   103  	leal	(%esi,%ebp,4), %esi	C src end
   104  	leal	(%edi,%ebp,4), %edi	C dst end
   105  	negl	%ebp			C -size
   106  
   107  	imull	%ebx, %edx		C inv*inv*d
   108  
   109  	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
   110  
   111  	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
   112  	pushl	%eax	FRAME_pushl()
   113  	imull	PARAM_DIVISOR, %eax
   114  	cmpl	$1, %eax
   115  	popl	%eax	FRAME_popl()')
   116  
   117  	movl	%eax, VAR_INVERSE
   118  	movl	(%esi,%ebp,4), %eax	C src[0]
   119  
   120  	xorl	%ebx, %ebx
   121  	xorl	%edx, %edx
   122  
   123  	incl	%ebp
   124  	jz	L(one)
   125  
   126  	movl	(%esi,%ebp,4), %edx	C src[1]
   127  
   128  	shrdl(	%cl, %edx, %eax)
   129  
   130  	movl	VAR_INVERSE, %edx
   131  	jmp	L(entry)
   132  
   133  
   134  	ALIGN(8)
   135  	nop	C k6 code alignment
   136  	nop
   137  L(top):
   138  	C eax	q
   139  	C ebx	carry bit, 0 or -1
   140  	C ecx	shift
   141  	C edx	carry limb
   142  	C esi	src end
   143  	C edi	dst end
   144  	C ebp	counter, limbs, negative
   145  
   146  	movl	-4(%esi,%ebp,4), %eax
   147  	subl	%ebx, %edx		C accumulate carry bit
   148  
   149  	movl	(%esi,%ebp,4), %ebx
   150  
   151  	shrdl(	%cl, %ebx, %eax)
   152  
   153  	subl	%edx, %eax		C apply carry limb
   154  	movl	VAR_INVERSE, %edx
   155  
   156  	sbbl	%ebx, %ebx
   157  
   158  L(entry):
   159  	imull	%edx, %eax
   160  
   161  	movl	%eax, -4(%edi,%ebp,4)
   162  	movl	PARAM_DIVISOR, %edx
   163  
   164  	mull	%edx
   165  
   166  	incl	%ebp
   167  	jnz	L(top)
   168  
   169  
   170  	movl	-4(%esi), %eax		C src high limb
   171  L(one):
   172  	shrl	%cl, %eax
   173  	popl	%esi	FRAME_popl()
   174  
   175  	addl	%ebx, %eax		C apply carry bit
   176  	popl	%ebx	FRAME_popl()
   177  
   178  	subl	%edx, %eax		C apply carry limb
   179  
   180  	imull	VAR_INVERSE, %eax
   181  
   182  	movl	%eax, -4(%edi)
   183  
   184  	popl	%edi
   185  	popl	%ebp
   186  
   187  	ret
   188  
   189  EPILOGUE()
   190  ASM_END()