github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/divrem_2.asm (about)

     1  dnl  x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
     2  
     3  dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C		norm	frac
    35  C 486
    36  C P5
    37  C P6-13		29.2
    38  C P6-15		*26
    39  C K6
    40  C K7		22
    41  C K8		*19
    42  C P4-f1
    43  C P4-f2		*65
    44  C P4-f3
    45  C P4-f4		*72
    46  
    47  C A star means numbers not updated for the latest version of the code.
    48  
    49  
    50  C TODO
    51  C  * Perhaps keep ecx or esi in stack slot, freeing up a reg for q0.
    52  C  * The loop has not been carefully tuned.  We should at the very least do
    53  C    some local insn swapping.
    54  C  * The code outside the main loop is what gcc generated.  Clean up!
    55  C  * Clean up stack slot usage.
    56  
    57  C INPUT PARAMETERS
    58  C qp
    59  C fn
    60  C up_param
    61  C un_param
    62  C dp
    63  
    64  
    65  C eax ebx ecx edx esi edi ebp
    66  C         cnt         qp
    67  
    68  ASM_START()
    69  	TEXT
    70  	ALIGN(16)
    71  PROLOGUE(mpn_divrem_2)
    72  	push	%ebp
    73  	push	%edi
    74  	push	%esi
    75  	push	%ebx
    76  	sub	$36, %esp
    77  	mov	68(%esp), %ecx		C un
    78  	mov	72(%esp), %esi		C dp
    79  	movl	$0, 32(%esp)
    80  	lea	0(,%ecx,4), %edi
    81  	add	64(%esp), %edi		C up
    82  	mov	(%esi), %ebx
    83  	mov	4(%esi), %eax
    84  	mov	%ebx, 20(%esp)
    85  	sub	$12, %edi
    86  	mov	%eax, 24(%esp)
    87  	mov	%edi, 12(%esp)
    88  	mov	8(%edi), %ebx
    89  	mov	4(%edi), %ebp
    90  	cmp	%eax, %ebx
    91  	jb	L(8)
    92  	seta	%dl
    93  	cmp	20(%esp), %ebp
    94  	setae	%al
    95  	orb	%dl, %al		C "orb" form to placate Sun tools
    96  	jne	L(35)
    97  L(8):
    98  	mov	60(%esp), %esi		C fn
    99  	lea	-3(%esi,%ecx), %edi
   100  	test	%edi, %edi
   101  	js	L(9)
   102  	mov	24(%esp), %edx
   103  	mov	$-1, %esi
   104  	mov	%esi, %eax
   105  	mov	%esi, %ecx
   106  	not	%edx
   107  	divl	24(%esp)
   108  	mov	%eax, %esi
   109  	imul	24(%esp), %eax
   110  	mov	%eax, (%esp)
   111  	mov	%esi, %eax
   112  	mull	20(%esp)
   113  	mov	(%esp), %eax
   114  	add	20(%esp), %eax
   115  	adc	$0, %ecx
   116  	add	%eax, %edx
   117  	adc	$0, %ecx
   118  	mov	%ecx, %eax
   119  	js	L(32)
   120  L(36):	dec	%esi
   121  	sub	24(%esp), %edx
   122  	sbb	$0, %eax
   123  	jns	L(36)
   124  L(32):
   125  	mov	%esi, 16(%esp)		C di
   126  	mov	%edi, %ecx		C un
   127  	mov	12(%esp), %esi		C up
   128  	mov	24(%esp), %eax
   129  	neg	%eax
   130  	mov	%eax, 4(%esp)		C -d1
   131  	ALIGN(16)
   132  	nop
   133  
   134  C eax ebx ecx edx esi edi ebp  0    4   8   12  16  20  24  28  32   56  60
   135  C     n2  un      up      n1   q0  -d1          di  d0  d1      msl  qp  fn
   136  
   137  L(loop):
   138  	mov	16(%esp), %eax		C di
   139  	mul	%ebx
   140  	add	%ebp, %eax
   141  	mov	%eax, (%esp)		C q0
   142  	adc	%ebx, %edx
   143  	mov	%edx, %edi		C q
   144  	imul	4(%esp), %edx
   145  	mov	20(%esp), %eax
   146  	lea	(%edx, %ebp), %ebx	C n1 -= ...
   147  	mul	%edi
   148  	xor	%ebp, %ebp
   149  	cmp	60(%esp), %ecx
   150  	jl	L(19)
   151  	mov	(%esi), %ebp
   152  	sub	$4, %esi
   153  L(19):	sub	20(%esp), %ebp
   154  	sbb	24(%esp), %ebx
   155  	sub	%eax, %ebp
   156  	sbb	%edx, %ebx
   157  	mov	20(%esp), %eax		C d1
   158  	inc	%edi
   159  	xor	%edx, %edx
   160  	cmp	(%esp), %ebx
   161  	adc	$-1, %edx		C mask
   162  	add	%edx, %edi		C q--
   163  	and	%edx, %eax		C d0 or 0
   164  	and	24(%esp), %edx		C d1 or 0
   165  	add	%eax, %ebp
   166  	adc	%edx, %ebx
   167  	cmp	24(%esp), %ebx
   168  	jae	L(fix)
   169  L(bck):	mov	56(%esp), %edx
   170  	mov	%edi, (%edx, %ecx, 4)
   171  	dec	%ecx
   172  	jns	L(loop)
   173  
   174  L(9):	mov	64(%esp), %esi		C up
   175  	mov	%ebp, (%esi)
   176  	mov	%ebx, 4(%esi)
   177  	mov	32(%esp), %eax
   178  	add	$36, %esp
   179  	pop	%ebx
   180  	pop	%esi
   181  	pop	%edi
   182  	pop	%ebp
   183  	ret
   184  
   185  L(fix):	seta	%dl
   186  	cmp	20(%esp), %ebp
   187  	setae	%al
   188  	orb	%dl, %al		C "orb" form to placate Sun tools
   189  	je	L(bck)
   190  	inc	%edi
   191  	sub	20(%esp), %ebp
   192  	sbb	24(%esp), %ebx
   193  	jmp	L(bck)
   194  
   195  L(35):	sub	20(%esp), %ebp
   196  	sbb	24(%esp), %ebx
   197  	movl	$1, 32(%esp)
   198  	jmp	L(8)
   199  EPILOGUE()