github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/arm/v6/popham.asm (about)

     1  dnl  ARM mpn_popcount and mpn_hamdist.
     2  
     3  dnl  Contributed to the GNU project by Torbjörn Granlund.
     4  
     5  dnl  Copyright 2013 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C		     popcount	      hamdist
    36  C		    cycles/limb	    cycles/limb
    37  C StrongARM		 -
    38  C XScale		 -
    39  C Cortex-A7		 ?
    40  C Cortex-A8		 ?
    41  C Cortex-A9		 8.94		 9.47
    42  C Cortex-A15		 5.67		 6.44
    43  
    44  C Architecture requirements:
    45  C v5	-
    46  C v5t	-
    47  C v5te	ldrd strd
    48  C v6	usada8
    49  C v6t2	-
    50  C v7a	-
    51  
    52  ifdef(`OPERATION_popcount',`
    53    define(`func',`mpn_popcount')
    54    define(`ap',		`r0')
    55    define(`n',		`r1')
    56    define(`a0',		`r2')
    57    define(`a1',		`r3')
    58    define(`s',		`r5')
    59    define(`b_01010101',	`r6')
    60    define(`b_00110011',	`r7')
    61    define(`b_00001111',	`r8')
    62    define(`zero',	`r9')
    63    define(`POPC',	`$1')
    64    define(`HAMD',	`dnl')
    65  ')
    66  ifdef(`OPERATION_hamdist',`
    67    define(`func',`mpn_hamdist')
    68    define(`ap',		`r0')
    69    define(`bp',		`r1')
    70    define(`n',		`r2')
    71    define(`a0',		`r6')
    72    define(`a1',		`r7')
    73    define(`b0',		`r4')
    74    define(`b1',		`r5')
    75    define(`s',		`r11')
    76    define(`b_01010101',	`r8')
    77    define(`b_00110011',	`r9')
    78    define(`b_00001111',	`r10')
    79    define(`zero',	`r3')
    80    define(`POPC',	`dnl')
    81    define(`HAMD',	`$1')
    82  ')
    83  
    84  MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
    85  
    86  ASM_START()
    87  PROLOGUE(func)
    88  POPC(`	push	{ r4-r9 }	')
    89  HAMD(`	push	{ r4-r11 }	')
    90  
    91  	ldr	b_01010101, =0x55555555
    92  	mov	r12, #0
    93  	ldr	b_00110011, =0x33333333
    94  	mov	zero, #0
    95  	ldr	b_00001111, =0x0f0f0f0f
    96  
    97  	tst	n, #1
    98  	beq	L(evn)
    99  
   100  L(odd):	ldr	a1, [ap], #4		C 1 x 32 1-bit accumulators, 0-1
   101  HAMD(`	ldr	b1, [bp], #4	')	C 1 x 32 1-bit accumulators, 0-1
   102  HAMD(`	eor	a1, a1, b1	')
   103  	and	r4, b_01010101, a1, lsr #1
   104  	sub	a1, a1, r4
   105  	and	r4, a1, b_00110011
   106  	bic	r5, a1, b_00110011
   107  	add	r5, r4, r5, lsr #2	C 8 4-bit accumulators, 0-4
   108  	subs	n, n, #1
   109  	b	L(mid)
   110  
   111  L(evn):	mov	s, #0
   112  
   113  L(top):	ldrd	a0, a1, [ap], #8	C 2 x 32 1-bit accumulators, 0-1
   114  HAMD(`	ldrd	b0, b1, [bp], #8')
   115  HAMD(`	eor	a0, a0, b0	')
   116  HAMD(`	eor	a1, a1, b1	')
   117  	subs	n, n, #2
   118  	usada8	r12, s, zero, r12
   119  	and	r4, b_01010101, a0, lsr #1
   120  	sub	a0, a0, r4
   121  	and	r4, b_01010101, a1, lsr #1
   122  	sub	a1, a1, r4
   123  	and	r4, a0, b_00110011
   124  	bic	r5, a0, b_00110011
   125  	add	a0, r4, r5, lsr #2	C 8 4-bit accumulators, 0-4
   126  	and	r4, a1, b_00110011
   127  	bic	r5, a1, b_00110011
   128  	add	a1, r4, r5, lsr #2	C 8 4-bit accumulators, 0-4
   129  	add	r5, a0, a1		C 8 4-bit accumulators, 0-8
   130  L(mid):	and	r4, r5, b_00001111
   131  	bic	r5, r5, b_00001111
   132  	add	s, r4, r5, lsr #4	C 4 8-bit accumulators
   133  	bne	L(top)
   134  
   135  	usada8	r0, s, zero, r12
   136  POPC(`	pop	{ r4-r9 }	')
   137  HAMD(`	pop	{ r4-r11 }	')
   138  	bx	r14
   139  EPILOGUE()