github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/mod_34lsub1.asm (about)

     1  dnl  Generic x86 mpn_mod_34lsub1 -- mpn remainder modulo 2^24-1.
     2  
     3  dnl  Copyright 2000-2002, 2004 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C      cycles/limb
    35  C P5	  3.0
    36  C P6	  3.66
    37  C K6	  3.0
    38  C K7	  1.3
    39  C P4	  9
    40  
    41  
    42  C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
    43  C
    44  
    45  defframe(PARAM_SIZE, 8)
    46  defframe(PARAM_SRC,  4)
    47  
    48  dnl  re-use parameter space
    49  define(SAVE_EBX, `PARAM_SRC')
    50  
    51  	TEXT
    52  	ALIGN(16)
    53  PROLOGUE(mpn_mod_34lsub1)
    54  deflit(`FRAME',0)
    55  
    56  	movl	PARAM_SIZE, %ecx
    57  	movl	PARAM_SRC, %edx
    58  
    59  	subl	$2, %ecx
    60  	ja	L(three_or_more)
    61  
    62  	movl	(%edx), %eax
    63  	jb	L(one)
    64  
    65  	movl	4(%edx), %ecx
    66  	movl	%eax, %edx
    67  	shrl	$24, %eax		C src[0] low
    68  
    69  	andl	$0xFFFFFF, %edx		C src[0] high
    70  	addl	%edx, %eax
    71  	movl	%ecx, %edx
    72  
    73  	andl	$0xFFFF, %ecx
    74  	shrl	$16, %edx		C src[1] high
    75  	addl	%edx, %eax
    76  
    77  	shll	$8, %ecx		C src[1] low
    78  	addl	%ecx, %eax
    79  
    80  L(one):
    81  	ret
    82  
    83  
    84  L(three_or_more):
    85  	C eax
    86  	C ebx
    87  	C ecx	size-2
    88  	C edx	src
    89  	C esi
    90  	C edi
    91  	C ebp
    92  
    93  	movl	%ebx, SAVE_EBX		C and arrange 16-byte loop alignment
    94  	xorl	%ebx, %ebx
    95  
    96  	pushl	%esi	FRAME_pushl()
    97  	xorl	%esi, %esi
    98  
    99  	pushl	%edi	FRAME_pushl()
   100  	xorl	%eax, %eax		C and clear carry flag
   101  
   102  
   103  	C offset 0x40 here
   104  L(top):
   105  	C eax	acc 0mod3
   106  	C ebx	acc 1mod3
   107  	C ecx	counter, limbs
   108  	C edx	src
   109  	C esi	acc 2mod3
   110  	C edi
   111  	C ebp
   112  
   113  	leal	12(%edx), %edx
   114  	leal	-2(%ecx), %ecx
   115  
   116  	adcl	-12(%edx), %eax
   117  	adcl	-8(%edx), %ebx
   118  	adcl	-4(%edx), %esi
   119  
   120  	decl	%ecx
   121  	jg	L(top)
   122  
   123  
   124  	C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively
   125  
   126  	movl	$0xFFFFFFFF, %edi
   127  	incl	%ecx
   128  	js	L(combine)
   129  
   130  	adcl	(%edx), %eax
   131  	movl	$0xFFFFFF00, %edi
   132  	decl	%ecx
   133  	js	L(combine)
   134  
   135  	adcl	4(%edx), %ebx
   136  	movl	$0xFFFF0000, %edi
   137  
   138  
   139  L(combine):
   140  	C eax	acc 0mod3
   141  	C ebx	acc 1mod3
   142  	C ecx
   143  	C edx
   144  	C esi	acc 2mod3
   145  	C edi	mask
   146  	C ebp
   147  
   148  	sbbl	%ecx, %ecx		C carry
   149  	movl	%eax, %edx		C 0mod3
   150  
   151  	shrl	$24, %eax		C 0mod3 high
   152  	andl	%edi, %ecx		C carry masked
   153  
   154  	subl	%ecx, %eax		C apply carry
   155  	movl	%ebx, %edi		C 1mod3
   156  
   157  	shrl	$16, %ebx		C 1mod3 high
   158  	andl	$0x00FFFFFF, %edx	C 0mod3 low
   159  
   160  	addl	%edx, %eax		C apply 0mod3 low
   161  	andl	$0xFFFF, %edi
   162  
   163  	shll	$8, %edi		C 1mod3 low
   164  	addl	%ebx, %eax		C apply 1mod3 high
   165  
   166  	addl	%edi, %eax		C apply 1mod3 low
   167  	movl	%esi, %edx		C 2mod3
   168  
   169  	shrl	$8, %esi		C 2mod3 high
   170  	andl	$0xFF, %edx		C 2mod3 low
   171  
   172  	shll	$16, %edx		C 2mod3 low
   173  	addl	%esi, %eax		C apply 2mod3 high
   174  
   175  	addl	%edx, %eax		C apply 2mod3 low
   176  	popl	%edi	FRAME_popl()
   177  
   178  	movl	SAVE_EBX, %ebx
   179  	popl	%esi	FRAME_popl()
   180  
   181  	ret
   182  
   183  EPILOGUE()