github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/bobcat/aorsmul_1.asm (about)

     1  dnl  AMD64 mpn_addmul_1 and mpn_submul_1 optimised for AMD bobcat.
     2  
     3  dnl  Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C	     cycles/limb
    34  C AMD K8,K9	 4.5
    35  C AMD K10	 4.5
    36  C AMD bd1	 4.75
    37  C AMD bobcat	 5
    38  C Intel P4	17.7
    39  C Intel core2	 5.5
    40  C Intel NHM	 5.43
    41  C Intel SBR	 3.92
    42  C Intel atom	23
    43  C VIA nano	 5.63
    44  
    45  ABI_SUPPORT(DOS64)
    46  ABI_SUPPORT(STD64)
    47  
    48  ifdef(`OPERATION_addmul_1',`
    49        define(`ADDSUB',        `add')
    50        define(`func',  `mpn_addmul_1')
    51  ')
    52  ifdef(`OPERATION_submul_1',`
    53        define(`ADDSUB',        `sub')
    54        define(`func',  `mpn_submul_1')
    55  ')
    56  
    57  MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
    58  
    59  C Standard parameters
    60  define(`rp',              `%rdi')
    61  define(`up',              `%rsi')
    62  define(`n_param',         `%rdx')
    63  define(`v0',              `%rcx')
    64  C Standard allocations
    65  define(`n',               `%rbx')
    66  define(`w0',              `%r8')
    67  define(`w1',              `%r9')
    68  define(`w2',              `%r10')
    69  define(`w3',              `%r11')
    70  
    71  C DOS64 parameters
    72  IFDOS(` define(`rp',      `%rcx')    ') dnl
    73  IFDOS(` define(`up',      `%rsi')    ') dnl
    74  IFDOS(` define(`n_param', `%r8')     ') dnl
    75  IFDOS(` define(`v0',      `%r9')     ') dnl
    76  C DOS64 allocations
    77  IFDOS(` define(`n',       `%rbx')    ') dnl
    78  IFDOS(` define(`w0',      `%r8')     ') dnl
    79  IFDOS(` define(`w1',      `%rdi')    ') dnl
    80  IFDOS(` define(`w2',      `%r10')    ') dnl
    81  IFDOS(` define(`w3',      `%r11')    ') dnl
    82  
    83  ASM_START()
    84  	TEXT
    85  	ALIGN(16)
    86  PROLOGUE(func)
    87  IFDOS(`	push	%rsi		')
    88  IFDOS(`	push	%rdi		')
    89  IFDOS(`	mov	%rdx, %rsi	')
    90  
    91  	push	%rbx
    92  	mov	(up), %rax
    93  
    94  	lea	-16(rp,n_param,8), rp
    95  	lea	-16(up,n_param,8), up
    96  
    97  	mov	n_param, n
    98  	and	$3, R32(n_param)
    99  	jz	L(b0)
   100  	cmp	$2, R32(n_param)
   101  	ja	L(b3)
   102  	jz	L(b2)
   103  
   104  L(b1):	mul	v0
   105  	cmp	$1, n
   106  	jz	L(n1)
   107  	mov	%rax, w2
   108  	mov	%rdx, w3
   109  	neg	n
   110  	add	$3, n
   111  	jmp	L(L1)
   112  L(n1):	ADDSUB	%rax, 8(rp)
   113  	adc	$0, %rdx
   114  	mov	%rdx, %rax
   115  	pop	%rbx
   116  IFDOS(`	pop	%rdi		')
   117  IFDOS(`	pop	%rsi		')
   118  	ret
   119  
   120  L(b3):	mul	v0
   121  	mov	%rax, w2
   122  	mov	%rdx, w3
   123  	neg	n
   124  	inc	n
   125  	jmp	L(L3)
   126  
   127  L(b0):	mul	v0
   128  	mov	%rax, w0
   129  	mov	%rdx, w1
   130  	neg	n
   131  	add	$2, n
   132  	jmp	L(L0)
   133  
   134  L(b2):	mul	v0
   135  	mov	%rax, w0
   136  	mov	%rdx, w1
   137  	neg	n
   138  	jmp	L(L2)
   139  
   140  	ALIGN(16)
   141  L(top):	ADDSUB	w0, -16(rp,n,8)
   142  	adc	w1, w2
   143  	adc	$0, w3
   144  L(L1):	mov	0(up,n,8), %rax
   145  	mul	v0
   146  	mov	%rax, w0
   147  	mov	%rdx, w1
   148  	ADDSUB	w2, -8(rp,n,8)
   149  	adc	w3, w0
   150  	adc	$0, w1
   151  L(L0):	mov	8(up,n,8), %rax
   152  	mul	v0
   153  	mov	%rax, w2
   154  	mov	%rdx, w3
   155  	ADDSUB	w0, 0(rp,n,8)
   156  	adc	w1, w2
   157  	adc	$0, w3
   158  L(L3):	mov	16(up,n,8), %rax
   159  	mul	v0
   160  	mov	%rax, w0
   161  	mov	%rdx, w1
   162  	ADDSUB	w2, 8(rp,n,8)
   163  	adc	w3, w0
   164  	adc	$0, w1
   165  L(L2):	mov	24(up,n,8), %rax
   166  	mul	v0
   167  	mov	%rax, w2
   168  	mov	%rdx, w3
   169  	add	$4, n
   170  	js	L(top)
   171  
   172  L(end):	ADDSUB	w0, (rp)
   173  	adc	w1, w2
   174  	adc	$0, w3
   175  	ADDSUB	w2, 8(rp)
   176  	adc	$0, w3
   177  	mov	w3, %rax
   178  
   179  	pop	%rbx
   180  IFDOS(`	pop	%rdi		')
   181  IFDOS(`	pop	%rsi		')
   182  	ret
   183  EPILOGUE()