github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/copyi.asm (about)

     1  dnl  x86 mpn_copyi -- copy limb vector, incrementing.
     2  
     3  dnl  Copyright 1999-2002 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C     cycles/limb  startup (approx)
    35  C P5	  1.0	      35
    36  C P6	  0.75	      45
    37  C K6	  1.0	      30
    38  C K7	  1.3	      65
    39  C P4	  1.0	     120
    40  C
    41  C (Startup time includes some function call overheads.)
    42  
    43  
    44  C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
    45  C
    46  C Copy src,size to dst,size, working from low to high addresses.
    47  C
    48  C The code here is very generic and can be expected to be reasonable on all
    49  C the x86 family.
    50  C
    51  C P6 -  An MMX based copy was tried, but was found to be slower than a rep
    52  C       movs in all cases.  The fastest MMX found was 0.8 cycles/limb (when
    53  C       fully aligned).  A rep movs seems to have a startup time of about 15
    54  C       cycles, but doing something special for small sizes could lead to a
    55  C       branch misprediction that would destroy any saving.  For now a plain
    56  C       rep movs seems ok.
    57  C
    58  C K62 - We used to have a big chunk of code doing an MMX copy at 0.56 c/l if
    59  C       aligned or a 1.0 rep movs if not.  But that seemed excessive since
    60  C       it only got an advantage half the time, and even then only showed it
    61  C       above 50 limbs or so.
    62  
    63  defframe(PARAM_SIZE,12)
    64  defframe(PARAM_SRC, 8)
    65  defframe(PARAM_DST, 4)
    66  deflit(`FRAME',0)
    67  
    68  	TEXT
    69  	ALIGN(32)
    70  
    71  	C eax	saved esi
    72  	C ebx
    73  	C ecx	counter
    74  	C edx	saved edi
    75  	C esi	src
    76  	C edi	dst
    77  	C ebp
    78  
    79  PROLOGUE(mpn_copyi)
    80  
    81  	movl	PARAM_SIZE, %ecx
    82  	movl	%esi, %eax
    83  
    84  	movl	PARAM_SRC, %esi
    85  	movl	%edi, %edx
    86  
    87  	movl	PARAM_DST, %edi
    88  
    89  	cld	C better safe than sorry, see mpn/x86/README
    90  
    91  	rep
    92  	movsl
    93  
    94  	movl	%eax, %esi
    95  	movl	%edx, %edi
    96  
    97  	ret
    98  
    99  EPILOGUE()