github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/arm/aorslsh1_n.asm (about)

     1  dnl  ARM mpn_addlsh1_n and mpn_sublsh1_n
     2  
     3  dnl  Contributed to the GNU project by Torbjörn Granlund.
     4  
     5  dnl  Copyright 2012 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C	      addlsh1_n       sublsh1_n
    36  C	     cycles/limb     cycles/limb
    37  C StrongARM	 ?		 ?
    38  C XScale	 ?		 ?
    39  C Cortex-A7	 ?		 ?
    40  C Cortex-A8	 ?		 ?
    41  C Cortex-A9	 3.12		 3.7
    42  C Cortex-A15	 ?		 ?
    43  
    44  C TODO
    45  C  * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1.
    46  C    The sublsh1_n code could surely be tweaked, its REVCY slows down things
    47  C    very much.  If two insns are really needed, it might help to separate them
    48  C    for better micro-parallelism.
    49  
    50  define(`rp', `r0')
    51  define(`up', `r1')
    52  define(`vp', `r2')
    53  define(`n',  `r3')
    54  
    55  ifdef(`OPERATION_addlsh1_n', `
    56    define(`ADDSUB',	adds)
    57    define(`ADDSUBC',	adcs)
    58    define(`SETCY',	`cmp	$1, #1')
    59    define(`RETVAL',	`adc	r0, $1, #2')
    60    define(`SAVECY',	`sbc	$1, $2, #0')
    61    define(`RESTCY',	`cmn	$1, #1')
    62    define(`REVCY',	`')
    63    define(`INICYR',	`mov	$1, #0')
    64    define(`r10r11',	`r11')
    65    define(`func',	mpn_addlsh1_n)
    66    define(`func_nc',	mpn_addlsh1_nc)')
    67  ifdef(`OPERATION_sublsh1_n', `
    68    define(`ADDSUB',	subs)
    69    define(`ADDSUBC',	sbcs)
    70    define(`SETCY',	`rsbs	$1, $1, #0')
    71    define(`RETVAL',	`adc	r0, $1, #1')
    72    define(`SAVECY',	`sbc	$1, $1, $1')
    73    define(`RESTCY',	`cmn	$1, #1')
    74    define(`REVCY',	`sbc	$1, $1, $1
    75  			cmn	$1, #1')
    76    define(`INICYR',	`mvn	$1, #0')
    77    define(`r10r11',	`r10')
    78    define(`func',	mpn_sublsh1_n)
    79    define(`func_nc',	mpn_sublsh1_nc)')
    80  
    81  MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
    82  
    83  ASM_START()
    84  PROLOGUE(func)
    85  	push	{r4-r10r11, r14}
    86  
    87  ifdef(`OPERATION_addlsh1_n', `
    88  	mvn	r11, #0
    89  ')
    90  	INICYR(	r14)
    91  	subs	n, n, #3
    92  	blt	L(le2)			C carry clear on branch path
    93  
    94  	cmn	r0, #0			C clear carry
    95  	ldmia	vp!, {r8, r9, r10}
    96  	b	L(mid)
    97  
    98  L(top):	RESTCY(	r14)
    99  	ADDSUBC	r4, r4, r8
   100  	ADDSUBC	r5, r5, r9
   101  	ADDSUBC	r6, r6, r10
   102  	ldmia	vp!, {r8, r9, r10}
   103  	stmia	rp!, {r4, r5, r6}
   104  	REVCY(r14)
   105  	adcs	r8, r8, r8
   106  	adcs	r9, r9, r9
   107  	adcs	r10, r10, r10
   108  	ldmia	up!, {r4, r5, r6}
   109  	SAVECY(	r14, r11)
   110  	subs	n, n, #3
   111  	blt	L(exi)
   112  	RESTCY(	r12)
   113  	ADDSUBC	r4, r4, r8
   114  	ADDSUBC	r5, r5, r9
   115  	ADDSUBC	r6, r6, r10
   116  	ldmia	vp!, {r8, r9, r10}
   117  	stmia	rp!, {r4, r5, r6}
   118  	REVCY(r12)
   119  L(mid):	adcs	r8, r8, r8
   120  	adcs	r9, r9, r9
   121  	adcs	r10, r10, r10
   122  	ldmia	up!, {r4, r5, r6}
   123  	SAVECY(	r12, r11)
   124  	subs	n, n, #3
   125  	bge	L(top)
   126  
   127  	mov	r7, r12			C swap alternating...
   128  	mov	r12, r14		C ...carry-save...
   129  	mov	r14, r7			C ...registers
   130  
   131  L(exi):	RESTCY(	r12)
   132  	ADDSUBC	r4, r4, r8
   133  	ADDSUBC	r5, r5, r9
   134  	ADDSUBC	r6, r6, r10
   135  	stmia	rp!, {r4, r5, r6}
   136  
   137  	REVCY(r12)
   138  L(le2):	tst	n, #1			C n = {-1,-2,-3} map to [2], [1], [0]
   139  	beq	L(e1)
   140  
   141  L(e02):	tst	n, #2
   142  	beq	L(rt0)
   143  	ldm	vp, {r8, r9}
   144  	adcs	r8, r8, r8
   145  	adcs	r9, r9, r9
   146  	ldm	up, {r4, r5}
   147  	SAVECY(	r12, r11)
   148  	RESTCY(	r14)
   149  	ADDSUBC	r4, r4, r8
   150  	ADDSUBC	r5, r5, r9
   151  	stm	rp, {r4, r5}
   152  	b	L(rt1)
   153  
   154  L(e1):	ldr	r8, [vp]
   155  	adcs	r8, r8, r8
   156  	ldr	r4, [up]
   157  	SAVECY(	r12, r11)
   158  	RESTCY(	r14)
   159  	ADDSUBC	r4, r4, r8
   160  	str	r4, [rp]
   161  
   162  L(rt1):	mov	r14, r12
   163  	REVCY(r12)
   164  L(rt0):	RETVAL(	r14)
   165  	pop	{r4-r10r11, r14}
   166  	ret	r14
   167  EPILOGUE()