github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/arm64/sec_tabselect.asm (about)

     1  dnl  ARM64 Neon mpn_sec_tabselect.
     2  
     3  dnl  Contributed to the GNU project by Torbjörn Granlund.
     4  
     5  dnl  Copyright 2011-2014 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  
    36  C	     cycles/limb
    37  C Cortex-A53	 ?
    38  C Cortex-A57	 ?
    39  
    40  C void
    41  C mpn_sec_tabselect (mp_ptr rp, mp_srcptr *tab,
    42  C		     mp_size_t n, mp_size_t nents, mp_size_t which)
    43  
    44  changecom(@&*$)
    45  
    46  define(`rp',     `x0')
    47  define(`tp',     `x1')
    48  define(`n',      `x2')
    49  define(`nents',  `x3')
    50  define(`which',  `x4')
    51  
    52  define(`i',      `x5')
    53  define(`j',      `x6')
    54  
    55  define(`maskq',  `v4')
    56  
    57  ASM_START()
    58  PROLOGUE(mpn_sec_tabselect)
    59  	dup	v7.2d, x4			C 2 `which' copies
    60  
    61  	mov	x10, #1
    62  	dup	v6.2d, x10			C 2 copies of 1
    63  
    64  	subs	j, n, #4
    65  	b.mi	L(outer_end)
    66  
    67  L(outer_top):
    68  	mov	i, nents
    69  	mov	x12, tp				C preserve tp
    70  	movi	v5.16b, #0			C zero 2 counter copies
    71  	movi	v2.16b, #0
    72  	movi	v3.16b, #0
    73  	ALIGN(16)
    74  L(tp4):	cmeq	maskq.2d, v5.2d, v7.2d		C compare idx copies to `which' copies
    75  	ld1	{v0.2d,v1.2d}, [tp]
    76  	add	v5.2d, v5.2d, v6.2d
    77  	bit	v2.16b, v0.16b, maskq.16b
    78  	bit	v3.16b, v1.16b, maskq.16b
    79  	add	tp, tp, n, lsl #3
    80  	sub	i, i, #1
    81  	cbnz	i, L(tp4)
    82  	st1	{v2.2d,v3.2d}, [rp], #32
    83  	add	tp, x12, #32			C restore tp, point to next slice
    84  	subs	j, j, #4
    85  	b.pl	L(outer_top)
    86  L(outer_end):
    87  
    88  	tbz	n, #1, L(b0x)
    89  	mov	i, nents
    90  	mov	x12, tp
    91  	movi	v5.16b, #0			C zero 2 counter copies
    92  	movi	v2.16b, #0
    93  	ALIGN(16)
    94  L(tp2):	cmeq	maskq.2d, v5.2d, v7.2d
    95  	ld1	{v0.2d}, [tp]
    96  	add	v5.2d, v5.2d, v6.2d
    97  	bit	v2.16b, v0.16b, maskq.16b
    98  	add	tp, tp, n, lsl #3
    99  	sub	i, i, #1
   100  	cbnz	i, L(tp2)
   101  	st1	{v2.2d}, [rp], #16
   102  	add	tp, x12, #16
   103  
   104  L(b0x):	tbz	n, #0, L(b00)
   105  	mov	i, nents
   106  	mov	x12, tp
   107  	movi	v5.16b, #0			C zero 2 counter copies
   108  	movi	v2.16b, #0
   109  	ALIGN(16)
   110  L(tp1):	cmeq	maskq.2d, v5.2d, v7.2d
   111  	ld1	{v0.1d}, [tp]
   112  	add	v5.2d, v5.2d, v6.2d		C FIXME size should be `1d'
   113  	bit	v2.8b, v0.8b, maskq.8b
   114  	add	tp, tp, n, lsl #3
   115  	sub	i, i, #1
   116  	cbnz	i, L(tp1)
   117  	st1	{v2.1d}, [rp], #8
   118  	add	tp, x12, #8
   119  
   120  L(b00):	ret
   121  EPILOGUE()