github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/mulx/mul_1.asm (about)

     1  dnl  AMD64 mpn_mul_1 for CPUs with mulx.
     2  
     3  dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C	     cycles/limb
    34  C AMD K8,K9	 -
    35  C AMD K10	 -
    36  C AMD bd1	 -
    37  C AMD bd2	 ?
    38  C AMD bobcat	 -
    39  C AMD jaguar	 ?
    40  C Intel P4	 -
    41  C Intel PNR	 -
    42  C Intel NHM	 -
    43  C Intel SBR	 -
    44  C Intel HWL	 ?
    45  C Intel BWL	 ?
    46  C Intel atom	 -
    47  C VIA nano	 -
    48  
    49  define(`rp',      `%rdi')   C rcx
    50  define(`up',      `%rsi')   C rdx
    51  define(`n_param', `%rdx')   C r8
    52  define(`v0_param',`%rcx')   C r9
    53  
    54  define(`n',       `%rcx')
    55  define(`v0',      `%rdx')
    56  
    57  IFDOS(`	define(`up', ``%rsi'')	') dnl
    58  IFDOS(`	define(`rp', ``%rcx'')	') dnl
    59  IFDOS(`	define(`v0', ``%r9'')	') dnl
    60  IFDOS(`	define(`r9', ``rdi'')	') dnl
    61  IFDOS(`	define(`n',  ``%r8'')	') dnl
    62  IFDOS(`	define(`r8', ``r11'')	') dnl
    63  
    64  ASM_START()
    65  	TEXT
    66  	ALIGN(16)
    67  PROLOGUE(mpn_mul_1c)
    68  	jmp	L(ent)
    69  EPILOGUE()
    70  PROLOGUE(mpn_mul_1)
    71  	xor	R32(%r8), R32(%r8)	C carry-in limb
    72  L(ent):	mov	(up), %r9
    73  
    74  	push	%rbx
    75  	push	%r12
    76  	push	%r13
    77  
    78  	lea	(up,n_param,8), up
    79  	lea	-32(rp,n_param,8), rp
    80  	mov	R32(n_param), R32(%rax)
    81  	xchg	v0_param, v0		C FIXME: is this insn fast?
    82  
    83  	neg	n
    84  
    85  	and	$3, R8(%rax)
    86  	jz	L(b0)
    87  	cmp	$2, R8(%rax)
    88  	jz	L(b2)
    89  	jg	L(b3)
    90  
    91  L(b1):	mov	%r8, %r12
    92  	mulx	%r9, %rbx, %rax
    93  	sub	$-1, n
    94  	jz	L(wd1)
    95  	mulx	(up,n,8), %r9, %r8
    96  	mulx	8(up,n,8), %r11, %r10
    97  	add	%r12, %rbx
    98  	jmp	L(lo1)
    99  
   100  L(b3):	mulx	%r9, %r11, %r10
   101  	mulx	8(up,n,8), %r13, %r12
   102  	mulx	16(up,n,8), %rbx, %rax
   103  	sub	$-3, n
   104  	jz	L(wd3)
   105  	add	%r8, %r11
   106  	jmp	L(lo3)
   107  
   108  L(b2):	mov	%r8, %r10		C carry-in limb
   109  	mulx	%r9, %r13, %r12
   110  	mulx	8(up,n,8), %rbx, %rax
   111  	sub	$-2, n
   112  	jz	L(wd2)
   113  	mulx	(up,n,8), %r9, %r8
   114  	add	%r10, %r13
   115  	jmp	L(lo2)
   116  
   117  L(b0):	mov	%r8, %rax		C carry-in limb
   118  	mulx	%r9, %r9, %r8
   119  	mulx	8(up,n,8), %r11, %r10
   120  	mulx	16(up,n,8), %r13, %r12
   121  	add	%rax, %r9
   122  	jmp	L(lo0)
   123  
   124  L(top):	jrcxz	L(end)
   125  	adc	%r8, %r11
   126  	mov	%r9, (rp,n,8)
   127  L(lo3):	mulx	(up,n,8), %r9, %r8
   128  	adc	%r10, %r13
   129  	mov	%r11, 8(rp,n,8)
   130  L(lo2):	mulx	8(up,n,8), %r11, %r10
   131  	adc	%r12, %rbx
   132  	mov	%r13, 16(rp,n,8)
   133  L(lo1):	mulx	16(up,n,8), %r13, %r12
   134  	adc	%rax, %r9
   135  	mov	%rbx, 24(rp,n,8)
   136  L(lo0):	mulx	24(up,n,8), %rbx, %rax
   137  	lea	4(n), n
   138  	jmp	L(top)
   139  
   140  L(end):	mov	%r9, (rp)
   141  L(wd3):	adc	%r8, %r11
   142  	mov	%r11, 8(rp)
   143  L(wd2):	adc	%r10, %r13
   144  	mov	%r13, 16(rp)
   145  L(wd1):	adc	%r12, %rbx
   146  	adc	n, %rax
   147  	mov	%rbx, 24(rp)
   148  
   149  	pop	%r13
   150  	pop	%r12
   151  	pop	%rbx
   152  	ret
   153  EPILOGUE()
   154  ASM_END()