github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/mod_34lsub1.asm (about)

     1  dnl  Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
     2  
     3  dnl  Copyright 2000-2002 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C P5: 1.66 cycles/limb
    35  
    36  
    37  C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
    38  C
    39  
    40  defframe(PARAM_SIZE, 8)
    41  defframe(PARAM_SRC,  4)
    42  
    43  	TEXT
    44  	ALIGN(16)
    45  PROLOGUE(mpn_mod_34lsub1)
    46  deflit(`FRAME',0)
    47  
    48  	movl	PARAM_SIZE, %ecx
    49  	movl	PARAM_SRC, %edx
    50  
    51  	subl	$2, %ecx
    52  	ja	L(three_or_more)
    53  
    54  	movl	(%edx), %eax
    55  	jne	L(one)
    56  
    57  
    58  	movl	4(%edx), %ecx
    59  	movl	%eax, %edx
    60  
    61  	shrl	$24, %edx
    62  	andl	$0xFFFFFF, %eax
    63  
    64  	addl	%edx, %eax
    65  	movl	%ecx, %edx
    66  
    67  	shrl	$16, %ecx
    68  	andl	$0xFFFF, %edx
    69  
    70  	shll	$8, %edx
    71  	addl	%ecx, %eax
    72  
    73  	addl	%edx, %eax
    74  
    75  L(one):
    76  	ret
    77  
    78  
    79  L(three_or_more):
    80  	C eax
    81  	C ebx
    82  	C ecx	size-2
    83  	C edx	src
    84  	C esi
    85  	C edi
    86  	C ebp
    87  
    88  	pushl	%ebx	FRAME_pushl()
    89  	pushl	%esi	FRAME_pushl()
    90  
    91  	pushl	%edi	FRAME_pushl()
    92  	pushl	%ebp	FRAME_pushl()
    93  
    94  	xorl	%esi, %esi		C 0mod3
    95  	xorl	%edi, %edi		C 1mod3
    96  
    97  	xorl	%ebp, %ebp		C 2mod3, and clear carry
    98  
    99  L(top):
   100  	C eax	scratch
   101  	C ebx	scratch
   102  	C ecx	counter, limbs
   103  	C edx	src
   104  	C esi	0mod3
   105  	C edi	1mod3
   106  	C ebp	2mod3
   107  
   108  	movl	(%edx), %eax
   109  	movl	4(%edx), %ebx
   110  
   111  	adcl	%eax, %esi
   112  	movl	8(%edx), %eax
   113  
   114  	adcl	%ebx, %edi
   115  	leal	12(%edx), %edx
   116  
   117  	adcl	%eax, %ebp
   118  	leal	-2(%ecx), %ecx
   119  
   120  	decl	%ecx
   121  	jg	L(top)
   122  
   123  
   124  	C ecx is -2, -1 or 0, representing 0, 1 or 2 more limbs, respectively
   125  
   126  	movl	$0xFFFFFFFF, %ebx	C mask
   127  	incl	%ecx
   128  
   129  	js	L(combine)		C 0 more
   130  
   131  	movl	(%edx), %eax
   132  	movl	$0xFFFFFF00, %ebx
   133  
   134  	adcl	%eax, %esi
   135  	decl	%ecx
   136  
   137  	js	L(combine)		C 1 more
   138  
   139  	movl	4(%edx), %eax
   140  	movl	$0xFFFF0000, %ebx
   141  
   142  	adcl	%eax, %edi
   143  
   144  
   145  
   146  L(combine):
   147  	C eax
   148  	C ebx	mask
   149  	C ecx
   150  	C edx
   151  	C esi	0mod3
   152  	C edi	1mod3
   153  	C ebp	2mod3
   154  
   155  	sbbl	%ecx, %ecx		C carry
   156  	movl	%esi, %eax		C 0mod3
   157  
   158  	andl	%ebx, %ecx		C masked for position
   159  	andl	$0xFFFFFF, %eax		C 0mod3 low
   160  
   161  	shrl	$24, %esi		C 0mod3 high
   162  	subl	%ecx, %eax		C apply carry
   163  
   164  	addl	%esi, %eax		C apply 0mod3
   165  	movl	%edi, %ebx		C 1mod3
   166  
   167  	shrl	$16, %edi		C 1mod3 high
   168  	andl	$0x0000FFFF, %ebx
   169  
   170  	shll	$8, %ebx		C 1mod3 low
   171  	addl	%edi, %eax		C apply 1mod3 high
   172  
   173  	addl	%ebx, %eax		C apply 1mod3 low
   174  	movl	%ebp, %ebx		C 2mod3
   175  
   176  	shrl	$8, %ebp		C 2mod3 high
   177  	andl	$0xFF, %ebx
   178  
   179  	shll	$16, %ebx		C 2mod3 low
   180  	addl	%ebp, %eax		C apply 2mod3 high
   181  
   182  	addl	%ebx, %eax		C apply 2mod3 low
   183  
   184  	popl	%ebp
   185  	popl	%edi
   186  
   187  	popl	%esi
   188  	popl	%ebx
   189  
   190  	ret
   191  
   192  EPILOGUE()