github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/arm/dive_1.asm (about)

     1  dnl  ARM v4 mpn_modexact_1c_odd
     2  
     3  dnl  Contributed to the GNU project by Torbjorn Granlund.
     4  
     5  dnl  Copyright 2012 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C               cycles/limb       cycles/limb
    36  C               norm    unorm    modexact_1c_odd
    37  C StrongARM	 ?
    38  C XScale	 ?
    39  C Cortex-A7	 ?
    40  C Cortex-A8	 ?
    41  C Cortex-A9	10	12
    42  C Cortex-A15	 9	 9
    43  
    44  C Architecture requirements:
    45  C v5	-
    46  C v5t	-
    47  C v5te	-
    48  C v6	-
    49  C v6t2	-
    50  C v7a	-
    51  
    52  define(`rp', `r0')
    53  define(`up', `r1')
    54  define(`n',  `r2')
    55  define(`d',  `r3')
    56  
    57  define(`cy', `r7')
    58  define(`cnt', `r6')
    59  define(`tnc', `r8')
    60  
    61  ASM_START()
    62  PROLOGUE(mpn_divexact_1)
    63  	tst	d, #1
    64  	push	{r4-r9}
    65  	mov	cnt, #0
    66  	bne	L(inv)
    67  
    68  C count trailing zeros
    69  	movs	r4, d, lsl #16
    70  	moveq	d, d, lsr #16
    71  	moveq	cnt, #16
    72  	tst	d, #0xff
    73  	moveq	d, d, lsr #8
    74  	addeq	cnt, cnt, #8
    75  	LEA(	r4, ctz_tab)
    76  	and	r5, d, #0xff
    77  	ldrb	r4, [r4, r5]
    78  	mov	d, d, lsr r4
    79  	add	cnt, cnt, r4
    80  
    81  C binvert limb
    82  L(inv):	LEA(	r4, binvert_limb_table)
    83  	and	r12, d, #254
    84  	ldrb	r4, [r4, r12, lsr #1]
    85  	mul	r12, r4, r4
    86  	mul	r12, d, r12
    87  	rsb	r12, r12, r4, lsl #1
    88  	mul	r4, r12, r12
    89  	mul	r4, d, r4
    90  	rsb	r4, r4, r12, lsl #1	C r4 = inverse
    91  
    92  	tst	cnt, cnt
    93  	ldr	r5, [up], #4		C up[0]
    94  	mov	cy, #0
    95  	bne	L(unnorm)
    96  
    97  L(norm):
    98  	subs	n, n, #1		C set carry as side-effect
    99  	beq	L(end)
   100  
   101  	ALIGN(16)
   102  L(top):	sbcs	cy, r5, cy
   103  	ldr	r5, [up], #4
   104  	sub	n, n, #1
   105  	mul	r9, r4, cy
   106  	tst	n, n
   107  	umull	r12, cy, d, r9
   108  	str	r9, [rp], #4
   109  	bne	L(top)
   110  
   111  L(end):	sbc	cy, r5, cy
   112  	mul	r9, r4, cy
   113  	str	r9, [rp]
   114  	pop	{r4-r9}
   115  	ret	r14
   116  
   117  L(unnorm):
   118  	rsb	tnc, cnt, #32
   119  	mov	r5, r5, lsr cnt
   120  	subs	n, n, #1		C set carry as side-effect
   121  	beq	L(edu)
   122  
   123  	ALIGN(16)
   124  L(tpu):	ldr	r12, [up], #4
   125  	orr	r9, r5, r12, lsl tnc
   126  	mov	r5, r12, lsr cnt
   127  	sbcs	cy, r9, cy		C critical path ->cy->cy->
   128  	sub	n, n, #1
   129  	mul	r9, r4, cy		C critical path ->cy->r9->
   130  	tst	n, n
   131  	umull	r12, cy, d, r9		C critical path ->r9->cy->
   132  	str	r9, [rp], #4
   133  	bne	L(tpu)
   134  
   135  L(edu):	sbc	cy, r5, cy
   136  	mul	r9, r4, cy
   137  	str	r9, [rp]
   138  	pop	{r4-r9}
   139  	ret	r14
   140  EPILOGUE()
   141  
   142  	RODATA
   143  ctz_tab:
   144  	.byte	8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
   145  	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
   146  	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
   147  	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
   148  	.byte	7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
   149  	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
   150  	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
   151  	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0