github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/core2/lshiftc.asm (about)

     1  dnl  x86-64 mpn_lshiftc optimized for "Core 2".
     2  
     3  dnl  Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C	     cycles/limb
    35  C AMD K8,K9	 ?
    36  C AMD K10	 ?
    37  C Intel P4	 ?
    38  C Intel core2	 1.5
    39  C Intel NHM	 2.25	(up to about n = 260, then 1.875)
    40  C Intel SBR	 2.25
    41  C Intel atom	 ?
    42  C VIA nano	 ?
    43  
    44  
    45  C INPUT PARAMETERS
    46  define(`rp',	`%rdi')
    47  define(`up',	`%rsi')
    48  define(`n',	`%rdx')
    49  define(`cnt',	`%rcx')
    50  
    51  ABI_SUPPORT(DOS64)
    52  ABI_SUPPORT(STD64)
    53  
    54  ASM_START()
    55  	TEXT
    56  	ALIGN(16)
    57  PROLOGUE(mpn_lshiftc)
    58  	FUNC_ENTRY(4)
    59  	lea	-8(rp,n,8), rp
    60  	lea	-8(up,n,8), up
    61  
    62  	mov	R32(%rdx), R32(%rax)
    63  	and	$3, R32(%rax)
    64  	jne	L(nb00)
    65  L(b00):	C n = 4, 8, 12, ...
    66  	mov	(up), %r10
    67  	mov	-8(up), %r11
    68  	xor	R32(%rax), R32(%rax)
    69  	shld	R8(cnt), %r10, %rax
    70  	mov	-16(up), %r8
    71  	lea	24(rp), rp
    72  	sub	$4, n
    73  	jmp	L(00)
    74  
    75  L(nb00):C n = 1, 5, 9, ...
    76  	cmp	$2, R32(%rax)
    77  	jae	L(nb01)
    78  L(b01):	mov	(up), %r9
    79  	xor	R32(%rax), R32(%rax)
    80  	shld	R8(cnt), %r9, %rax
    81  	sub	$2, n
    82  	jb	L(le1)
    83  	mov	-8(up), %r10
    84  	mov	-16(up), %r11
    85  	lea	-8(up), up
    86  	lea	16(rp), rp
    87  	jmp	L(01)
    88  L(le1):	shl	R8(cnt), %r9
    89  	not	%r9
    90  	mov	%r9, (rp)
    91  	FUNC_EXIT()
    92  	ret
    93  
    94  L(nb01):C n = 2, 6, 10, ...
    95  	jne	L(b11)
    96  L(b10):	mov	(up), %r8
    97  	mov	-8(up), %r9
    98  	xor	R32(%rax), R32(%rax)
    99  	shld	R8(cnt), %r8, %rax
   100  	sub	$3, n
   101  	jb	L(le2)
   102  	mov	-16(up), %r10
   103  	lea	-16(up), up
   104  	lea	8(rp), rp
   105  	jmp	L(10)
   106  L(le2):	shld	R8(cnt), %r9, %r8
   107  	not	%r8
   108  	mov	%r8, (rp)
   109  	shl	R8(cnt), %r9
   110  	not	%r9
   111  	mov	%r9, -8(rp)
   112  	FUNC_EXIT()
   113  	ret
   114  
   115  	ALIGN(16)			C performance critical!
   116  L(b11):	C n = 3, 7, 11, ...
   117  	mov	(up), %r11
   118  	mov	-8(up), %r8
   119  	xor	R32(%rax), R32(%rax)
   120  	shld	R8(cnt), %r11, %rax
   121  	mov	-16(up), %r9
   122  	lea	-24(up), up
   123  	sub	$4, n
   124  	jb	L(end)
   125  
   126  	ALIGN(16)
   127  L(top):	shld	R8(cnt), %r8, %r11
   128  	mov	(up), %r10
   129  	not	%r11
   130  	mov	%r11, (rp)
   131  L(10):	shld	R8(cnt), %r9, %r8
   132  	mov	-8(up), %r11
   133  	not	%r8
   134  	mov	%r8, -8(rp)
   135  L(01):	shld	R8(cnt), %r10, %r9
   136  	mov	-16(up), %r8
   137  	not	%r9
   138  	mov	%r9, -16(rp)
   139  L(00):	shld	R8(cnt), %r11, %r10
   140  	mov	-24(up), %r9
   141  	not	%r10
   142  	mov	%r10, -24(rp)
   143  	add	$-32, up
   144  	lea	-32(rp), rp
   145  	sub	$4, n
   146  	jnc	L(top)
   147  
   148  L(end):	shld	R8(cnt), %r8, %r11
   149  	not	%r11
   150  	mov	%r11, (rp)
   151  	shld	R8(cnt), %r9, %r8
   152  	not	%r8
   153  	mov	%r8, -8(rp)
   154  	shl	R8(cnt), %r9
   155  	not	%r9
   156  	mov	%r9, -16(rp)
   157  	FUNC_EXIT()
   158  	ret
   159  EPILOGUE()