github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/k7/aorsmul_1.asm (about)

     1  dnl  AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
     2  
     3  dnl  Copyright 1999-2002, 2005, 2008 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C			    cycles/limb
    35  C P5
    36  C P6 model 0-8,10-12
    37  C P6 model 9  (Banias)		 6.5
    38  C P6 model 13 (Dothan)
    39  C P4 model 0  (Willamette)
    40  C P4 model 1  (?)
    41  C P4 model 2  (Northwood)
    42  C P4 model 3  (Prescott)
    43  C P4 model 4  (Nocona)
    44  C AMD K6
    45  C AMD K7			 3.75
    46  C AMD K8
    47  
    48  C TODO
    49  C  * Improve feed-in and wind-down code.  We beat the old code for all n != 1,
    50  C    but lose by 2x for n == 1.
    51  
    52  ifdef(`OPERATION_addmul_1',`
    53        define(`ADDSUB',        `add')
    54        define(`func',  `mpn_addmul_1')
    55  ')
    56  ifdef(`OPERATION_submul_1',`
    57        define(`ADDSUB',        `sub')
    58        define(`func',  `mpn_submul_1')
    59  ')
    60  
    61  MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
    62  
    63  ASM_START()
    64  	TEXT
    65  	ALIGN(16)
    66  PROLOGUE(func)
    67  	add	$-16, %esp
    68  	mov	%ebp, (%esp)
    69  	mov	%ebx, 4(%esp)
    70  	mov	%esi, 8(%esp)
    71  	mov	%edi, 12(%esp)
    72  
    73  	mov	20(%esp), %edi
    74  	mov	24(%esp), %esi
    75  	mov	28(%esp), %eax
    76  	mov	32(%esp), %ecx
    77  	mov	%eax, %ebx
    78  	shr	$2, %eax
    79  	mov	%eax, 28(%esp)
    80  	mov	(%esi), %eax
    81  	and	$3, %ebx
    82  	jz	L(b0)
    83  	cmp	$2, %ebx
    84  	jz	L(b2)
    85  	jg	L(b3)
    86  
    87  L(b1):	lea	-4(%esi), %esi
    88  	lea	-4(%edi), %edi
    89  	mul	%ecx
    90  	mov	%eax, %ebx
    91  	mov	%edx, %ebp
    92  	cmpl	$0, 28(%esp)
    93  	jz	L(cj1)
    94  	mov	8(%esi), %eax
    95  	jmp	L(1)
    96  
    97  L(b2):	mul	%ecx
    98  	mov	%eax, %ebp
    99  	mov	4(%esi), %eax
   100  	mov	%edx, %ebx
   101  	cmpl	$0, 28(%esp)
   102  	jne	L(2)
   103  	jmp	L(cj2)
   104  
   105  L(b3):	lea	-12(%esi), %esi
   106  	lea	-12(%edi), %edi
   107  	mul	%ecx
   108  	mov	%eax, %ebx
   109  	mov	%edx, %ebp
   110  	mov	16(%esi), %eax
   111  	incl	28(%esp)
   112  	jmp	L(3)
   113  
   114  L(b0):	lea	-8(%esi), %esi
   115  	lea	-8(%edi), %edi
   116  	mul	%ecx
   117  	mov	%eax, %ebp
   118  	mov	12(%esi), %eax
   119  	mov	%edx, %ebx
   120  	jmp	L(0)
   121  
   122  	ALIGN(16)
   123  L(top):	lea	16(%edi), %edi
   124  L(2):	mul	%ecx
   125  	ADDSUB	%ebp, 0(%edi)
   126  	mov	$0, %ebp
   127  	adc	%eax, %ebx
   128  	mov	8(%esi), %eax
   129  	adc	%edx, %ebp
   130  L(1):	mul	%ecx
   131  	ADDSUB	%ebx, 4(%edi)
   132  	mov	$0, %ebx
   133  	adc	%eax, %ebp
   134  	mov	12(%esi), %eax
   135  	adc	%edx, %ebx
   136  L(0):	mul	%ecx
   137  	ADDSUB	%ebp, 8(%edi)
   138  	mov	$0, %ebp
   139  	adc	%eax, %ebx
   140  	adc	%edx, %ebp
   141  	mov	16(%esi), %eax
   142  L(3):	mul	%ecx
   143  	ADDSUB	%ebx, 12(%edi)
   144  	adc	%eax, %ebp
   145  	mov	20(%esi), %eax
   146  	lea	16(%esi), %esi
   147  	mov	$0, %ebx
   148  	adc	%edx, %ebx
   149  	decl	28(%esp)
   150  	jnz	L(top)
   151  
   152  L(end):	lea	16(%edi), %edi
   153  L(cj2):	mul	%ecx
   154  	ADDSUB	%ebp, (%edi)
   155  	adc	%eax, %ebx
   156  	adc	$0, %edx
   157  L(cj1):	ADDSUB	%ebx, 4(%edi)
   158  	adc	$0, %edx
   159  	mov	%edx, %eax
   160  	mov	(%esp), %ebp
   161  	mov	4(%esp), %ebx
   162  	mov	8(%esp), %esi
   163  	mov	12(%esp), %edi
   164  	add	$16, %esp
   165  	ret
   166  EPILOGUE()
   167  ASM_END()