github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/addaddmul_1msb0.asm (about)

     1  dnl  AMD64 mpn_addaddmul_1msb0, R = Au + Bv, u,v < 2^63.
     2  
     3  dnl  Copyright 2008 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C	     cycles/limb
    34  C AMD K8,K9	 2.167
    35  C AMD K10	 2.167
    36  C Intel P4	12.0
    37  C Intel core2	 4.0
    38  C Intel corei	 ?
    39  C Intel atom	 ?
    40  C VIA nano	 ?
    41  
    42  C TODO
    43  C  * Perhaps handle various n mod 3 sizes better.  The code now is too large.
    44  
    45  C INPUT PARAMETERS
    46  define(`rp',	`%rdi')
    47  define(`ap',	`%rsi')
    48  define(`bp_param', `%rdx')
    49  define(`n',	`%rcx')
    50  define(`u0',	`%r8')
    51  define(`v0',	`%r9')
    52  
    53  
    54  define(`bp', `%rbp')
    55  
    56  ASM_START()
    57  	TEXT
    58  	ALIGN(16)
    59  PROLOGUE(mpn_addaddmul_1msb0)
    60  	push	%r12
    61  	push	%rbp
    62  
    63  	lea	(ap,n,8), ap
    64  	lea	(bp_param,n,8), bp
    65  	lea	(rp,n,8), rp
    66  	neg	n
    67  
    68  	mov	(ap,n,8), %rax
    69  	mul	%r8
    70  	mov	%rax, %r12
    71  	mov	(bp,n,8), %rax
    72  	mov	%rdx, %r10
    73  	add	$3, n
    74  	jns	L(end)
    75  
    76  	ALIGN(16)
    77  L(top):	mul	%r9
    78  	add	%rax, %r12
    79  	mov	-16(ap,n,8), %rax
    80  	adc	%rdx, %r10
    81  	mov	%r12, -24(rp,n,8)
    82  	mul	%r8
    83  	add	%rax, %r10
    84  	mov	-16(bp,n,8), %rax
    85  	mov	$0, R32(%r11)
    86  	adc	%rdx, %r11
    87  	mul	%r9
    88  	add	%rax, %r10
    89  	mov	-8(ap,n,8), %rax
    90  	adc	%rdx, %r11
    91  	mov	%r10, -16(rp,n,8)
    92  	mul	%r8
    93  	add	%rax, %r11
    94  	mov	-8(bp,n,8), %rax
    95  	mov	$0, R32(%r12)
    96  	adc	%rdx, %r12
    97  	mul	%r9
    98  	add	%rax, %r11
    99  	adc	%rdx, %r12
   100  	mov	(ap,n,8), %rax
   101  	mul	%r8
   102  	add	%rax, %r12
   103  	mov	%r11, -8(rp,n,8)
   104  	mov	(bp,n,8), %rax
   105  	mov	$0, R32(%r10)
   106  	adc	%rdx, %r10
   107  	add	$3, n
   108  	js	L(top)
   109  
   110  L(end):	cmp	$1, R32(n)
   111  	ja	2f
   112  	jz	1f
   113  
   114  	mul	%r9
   115  	add	%rax, %r12
   116  	mov	-16(ap), %rax
   117  	adc	%rdx, %r10
   118  	mov	%r12, -24(rp)
   119  	mul	%r8
   120  	add	%rax, %r10
   121  	mov	-16(bp), %rax
   122  	mov	$0, R32(%r11)
   123  	adc	%rdx, %r11
   124  	mul	%r9
   125  	add	%rax, %r10
   126  	mov	-8(ap), %rax
   127  	adc	%rdx, %r11
   128  	mov	%r10, -16(rp)
   129  	mul	%r8
   130  	add	%rax, %r11
   131  	mov	-8(bp), %rax
   132  	mov	$0, R32(%r12)
   133  	adc	%rdx, %r12
   134  	mul	%r9
   135  	add	%rax, %r11
   136  	adc	%rdx, %r12
   137  	mov	%r11, -8(rp)
   138  	mov	%r12, %rax
   139  	pop	%rbp
   140  	pop	%r12
   141  	ret
   142  
   143  1:	mul	%r9
   144  	add	%rax, %r12
   145  	mov	-8(ap), %rax
   146  	adc	%rdx, %r10
   147  	mov	%r12, -16(rp)
   148  	mul	%r8
   149  	add	%rax, %r10
   150  	mov	-8(bp), %rax
   151  	mov	$0, R32(%r11)
   152  	adc	%rdx, %r11
   153  	mul	%r9
   154  	add	%rax, %r10
   155  	adc	%rdx, %r11
   156  	mov	%r10, -8(rp)
   157  	mov	%r11, %rax
   158  	pop	%rbp
   159  	pop	%r12
   160  	ret
   161  
   162  2:	mul	%r9
   163  	add	%rax, %r12
   164  	mov	%r12, -8(rp)
   165  	adc	%rdx, %r10
   166  	mov	%r10, %rax
   167  	pop	%rbp
   168  	pop	%r12
   169  	ret
   170  EPILOGUE()