github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/com.asm (about)

     1  dnl  Intel Pentium mpn_com -- mpn ones complement.
     2  
     3  dnl  Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C P5: 1.75 cycles/limb
    35  
    36  
    37  NAILS_SUPPORT(0-31)
    38  
    39  
    40  C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
    41  C
    42  C This code is similar to mpn_copyi, basically there's just some "xorl
    43  C $GMP_NUMB_MASK"s inserted.
    44  C
    45  C Alternatives:
    46  C
    47  C On P55 some MMX code could be 1.25 c/l (8 limb unrolled) if src and dst
    48  C are the same alignment mod 8, but it doesn't seem worth the trouble for
    49  C just that case (there'd need to be some plain integer available too for
    50  C the unaligned case).
    51  
    52  defframe(PARAM_SIZE,12)
    53  defframe(PARAM_SRC, 8)
    54  defframe(PARAM_DST, 4)
    55  
    56  	TEXT
    57  	ALIGN(8)
    58  PROLOGUE(mpn_com)
    59  deflit(`FRAME',0)
    60  
    61  	movl	PARAM_SRC, %eax
    62  	movl	PARAM_SIZE, %ecx
    63  
    64  	pushl	%esi	FRAME_pushl()
    65  	pushl	%edi	FRAME_pushl()
    66  
    67  	leal	(%eax,%ecx,4), %eax
    68  	xorl	$-1, %ecx		C -size-1
    69  
    70  	movl	PARAM_DST, %edx
    71  	addl	$8, %ecx		C -size+7
    72  
    73  	jns	L(end)
    74  
    75  	movl	(%edx), %esi		C fetch destination cache line
    76  	nop
    77  
    78  L(top):
    79  	C eax	&src[size]
    80  	C ebx
    81  	C ecx	counter, limbs, negative
    82  	C edx	dst, incrementing
    83  	C esi	scratch
    84  	C edi	scratch
    85  	C ebp
    86  
    87  	movl	28(%edx), %esi		C destination prefetch
    88  	addl	$32, %edx
    89  
    90  	movl	-28(%eax,%ecx,4), %esi
    91  	movl	-24(%eax,%ecx,4), %edi
    92  	xorl	$GMP_NUMB_MASK, %esi
    93  	xorl	$GMP_NUMB_MASK, %edi
    94  	movl	%esi, -32(%edx)
    95  	movl	%edi, -28(%edx)
    96  
    97  	movl	-20(%eax,%ecx,4), %esi
    98  	movl	-16(%eax,%ecx,4), %edi
    99  	xorl	$GMP_NUMB_MASK, %esi
   100  	xorl	$GMP_NUMB_MASK, %edi
   101  	movl	%esi, -24(%edx)
   102  	movl	%edi, -20(%edx)
   103  
   104  	movl	-12(%eax,%ecx,4), %esi
   105  	movl	-8(%eax,%ecx,4), %edi
   106  	xorl	$GMP_NUMB_MASK, %esi
   107  	xorl	$GMP_NUMB_MASK, %edi
   108  	movl	%esi, -16(%edx)
   109  	movl	%edi, -12(%edx)
   110  
   111  	movl	-4(%eax,%ecx,4), %esi
   112  	movl	(%eax,%ecx,4), %edi
   113  	xorl	$GMP_NUMB_MASK, %esi
   114  	xorl	$GMP_NUMB_MASK, %edi
   115  	movl	%esi, -8(%edx)
   116  	movl	%edi, -4(%edx)
   117  
   118  	addl	$8, %ecx
   119  	js	L(top)
   120  
   121  
   122  L(end):
   123  	C eax	&src[size]
   124  	C ecx	0 to 7, representing respectively 7 to 0 limbs remaining
   125  	C edx	dst, next location to store
   126  
   127  	subl	$4, %ecx
   128  	nop
   129  
   130  	jns	L(no4)
   131  
   132  	movl	-12(%eax,%ecx,4), %esi
   133  	movl	-8(%eax,%ecx,4), %edi
   134  	xorl	$GMP_NUMB_MASK, %esi
   135  	xorl	$GMP_NUMB_MASK, %edi
   136  	movl	%esi, (%edx)
   137  	movl	%edi, 4(%edx)
   138  
   139  	movl	-4(%eax,%ecx,4), %esi
   140  	movl	(%eax,%ecx,4), %edi
   141  	xorl	$GMP_NUMB_MASK, %esi
   142  	xorl	$GMP_NUMB_MASK, %edi
   143  	movl	%esi, 8(%edx)
   144  	movl	%edi, 12(%edx)
   145  
   146  	addl	$16, %edx
   147  	addl	$4, %ecx
   148  L(no4):
   149  
   150  	subl	$2, %ecx
   151  	nop
   152  
   153  	jns	L(no2)
   154  
   155  	movl	-4(%eax,%ecx,4), %esi
   156  	movl	(%eax,%ecx,4), %edi
   157  	xorl	$GMP_NUMB_MASK, %esi
   158  	xorl	$GMP_NUMB_MASK, %edi
   159  	movl	%esi, (%edx)
   160  	movl	%edi, 4(%edx)
   161  
   162  	addl	$8, %edx
   163  	addl	$2, %ecx
   164  L(no2):
   165  
   166  	popl	%edi
   167  	jnz	L(done)
   168  
   169  	movl	-4(%eax), %ecx
   170  
   171  	xorl	$GMP_NUMB_MASK, %ecx
   172  	popl	%esi
   173  
   174  	movl	%ecx, (%edx)
   175  	ret
   176  
   177  L(done):
   178  	popl	%esi
   179  	ret
   180  
   181  EPILOGUE()