github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/aorsmul_1.asm (about)

     1  dnl  Intel Pentium mpn_addmul_1 -- mpn by limb multiplication.
     2  
     3  dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
     4  dnl  Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  include(`../config.m4')
    33  
    34  
    35  C P5: 14.0 cycles/limb
    36  
    37  
    38  ifdef(`OPERATION_addmul_1', `
    39        define(M4_inst,        addl)
    40        define(M4_function_1,  mpn_addmul_1)
    41        define(M4_function_1c, mpn_addmul_1c)
    42  
    43  ',`ifdef(`OPERATION_submul_1', `
    44        define(M4_inst,        subl)
    45        define(M4_function_1,  mpn_submul_1)
    46        define(M4_function_1c, mpn_submul_1c)
    47  
    48  ',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
    49  ')')')
    50  
    51  MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
    52  
    53  
    54  C mp_limb_t mpn_addmul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
    55  C                         mp_limb_t mult);
    56  C mp_limb_t mpn_addmul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
    57  C                          mp_limb_t mult, mp_limb_t carry);
    58  C
    59  C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
    60  C                         mp_limb_t mult);
    61  C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
    62  C                          mp_limb_t mult, mp_limb_t carry);
    63  C
    64  
    65  defframe(PARAM_CARRY,     20)
    66  defframe(PARAM_MULTIPLIER,16)
    67  defframe(PARAM_SIZE,      12)
    68  defframe(PARAM_SRC,       8)
    69  defframe(PARAM_DST,       4)
    70  
    71  	TEXT
    72  
    73  	ALIGN(8)
    74  PROLOGUE(M4_function_1c)
    75  deflit(`FRAME',0)
    76  
    77  	movl	PARAM_CARRY, %ecx
    78  	pushl	%esi		FRAME_pushl()
    79  
    80  	jmp	L(start_1c)
    81  
    82  EPILOGUE()
    83  
    84  
    85  	ALIGN(8)
    86  PROLOGUE(M4_function_1)
    87  deflit(`FRAME',0)
    88  
    89  	xorl	%ecx, %ecx
    90  	pushl	%esi		FRAME_pushl()
    91  
    92  L(start_1c):
    93  	movl	PARAM_SRC, %esi
    94  	movl	PARAM_SIZE, %eax
    95  
    96  	pushl	%edi		FRAME_pushl()
    97  	pushl	%ebx		FRAME_pushl()
    98  
    99  	movl	PARAM_DST, %edi
   100  	leal	-1(%eax), %ebx		C size-1
   101  
   102  	leal	(%esi,%eax,4), %esi
   103  	xorl	$-1, %ebx		C -size, and clear carry
   104  
   105  	leal	(%edi,%eax,4), %edi
   106  
   107  L(top):
   108  	C eax
   109  	C ebx	counter, negative
   110  	C ecx	carry
   111  	C edx
   112  	C esi	src end
   113  	C edi	dst end
   114  	C ebp
   115  
   116  	adcl	$0, %ecx
   117  	movl	(%esi,%ebx,4), %eax
   118  
   119  	mull	PARAM_MULTIPLIER
   120  
   121  	addl	%ecx, %eax
   122  	movl	(%edi,%ebx,4), %ecx
   123  
   124  	adcl	$0, %edx
   125  	M4_inst	%eax, %ecx
   126  
   127  	movl	%ecx, (%edi,%ebx,4)
   128  	incl	%ebx
   129  
   130  	movl	%edx, %ecx
   131  	jnz	L(top)
   132  
   133  
   134  	adcl	$0, %ecx
   135  	popl	%ebx
   136  
   137  	movl	%ecx, %eax
   138  	popl	%edi
   139  
   140  	popl	%esi
   141  
   142  	ret
   143  
   144  EPILOGUE()