github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc64/sec_tabselect.asm (about)

     1  dnl  SPARC v9 mpn_sec_tabselect.
     2  
     3  dnl  Contributed to the GNU project by Torbjörn Granlund and David Miller.
     4  
     5  dnl  Copyright 2013 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C		   cycles/limb
    36  C UltraSPARC 1&2:	 2 hopefully
    37  C UltraSPARC 3:		 3
    38  C UltraSPARC T1:	17
    39  C UltraSPARC T3:	 ?
    40  C UltraSPARC T4/T5:	 2.25 hopefully
    41  
    42  C INPUT PARAMETERS
    43  define(`rp',     `%i0')
    44  define(`tp',     `%i1')
    45  define(`n',      `%i2')
    46  define(`nents',  `%i3')
    47  define(`which',  `%i4')
    48  
    49  define(`i',      `%g1')
    50  define(`j',      `%g3')
    51  define(`stride', `%g4')
    52  define(`tporig', `%g5')
    53  define(`mask',   `%o0')
    54  
    55  define(`data0',  `%l0')
    56  define(`data1',  `%l1')
    57  define(`data2',  `%l2')
    58  define(`data3',  `%l3')
    59  define(`t0',     `%l4')
    60  define(`t1',     `%l5')
    61  define(`t2',     `%l6')
    62  define(`t3',     `%l7')
    63  
    64  ASM_START()
    65  	REGISTER(%g2,#scratch)
    66  	REGISTER(%g3,#scratch)
    67  PROLOGUE(mpn_sec_tabselect)
    68  	save	%sp, -176, %sp
    69  
    70  	sllx	n, 3, stride
    71  	sub	n, 4, j
    72  	brlz	j, L(outer_end)
    73  	 mov	tp, tporig
    74  
    75  L(outer_loop):
    76  	clr	data0
    77  	clr	data1
    78  	clr	data2
    79  	clr	data3
    80  	mov	tporig, tp
    81  	mov	nents, i
    82  	mov	which, %o1
    83  
    84  L(top):	subcc	%o1, 1, %o1		C set carry iff o1 = 0
    85  	ldx	[tp + 0], t0
    86  	subc	%g0, %g0, mask
    87  	ldx	[tp + 8], t1
    88  	sub	i, 1, i
    89  	ldx	[tp + 16], t2
    90  	ldx	[tp + 24], t3
    91  	add	tp, stride, tp
    92  	and	t0, mask, t0
    93  	and	t1, mask, t1
    94  	or	t0, data0, data0
    95  	and	t2, mask, t2
    96  	or	t1, data1, data1
    97  	and	t3, mask, t3
    98  	or	t2, data2, data2
    99  	brnz	i, L(top)
   100  	 or	t3, data3, data3
   101  
   102  	stx	data0, [rp + 0]
   103  	subcc	j, 4, j
   104  	stx	data1, [rp + 8]
   105  	stx	data2, [rp + 16]
   106  	stx	data3, [rp + 24]
   107  	add	tporig, (4 * 8), tporig
   108  
   109  	brgez	j, L(outer_loop)
   110  	 add	rp, (4 * 8), rp
   111  L(outer_end):
   112  
   113  
   114  	andcc	n, 2, %g0
   115  	be	L(b0x)
   116  	 nop
   117  L(b1x):	clr	data0
   118  	clr	data1
   119  	mov	tporig, tp
   120  	mov	nents, i
   121  	mov	which, %o1
   122  
   123  L(tp2):	subcc	%o1, 1, %o1
   124  	ldx	[tp + 0], t0
   125  	subc	%g0, %g0, mask
   126  	ldx	[tp + 8], t1
   127  	sub	i, 1, i
   128  	add	tp, stride, tp
   129  	and	t0, mask, t0
   130  	and	t1, mask, t1
   131  	or	t0, data0, data0
   132  	brnz	i, L(tp2)
   133  	 or	t1, data1, data1
   134  
   135  	stx	data0, [rp + 0]
   136  	stx	data1, [rp + 8]
   137  	add	tporig, (2 * 8), tporig
   138  	add	rp, (2 * 8), rp
   139  
   140  
   141  L(b0x):	andcc	n, 1, %g0
   142  	be	L(b00)
   143  	 nop
   144  L(b01):	clr	data0
   145  	mov	tporig, tp
   146  	mov	nents, i
   147  	mov	which, %o1
   148  
   149  L(tp1):	subcc	%o1, 1, %o1
   150  	ldx	[tp + 0], t0
   151  	subc	%g0, %g0, mask
   152  	sub	i, 1, i
   153  	add	tp, stride, tp
   154  	and	t0, mask, t0
   155  	brnz	i, L(tp1)
   156  	 or	t0, data0, data0
   157  
   158  	stx	data0, [rp + 0]
   159  
   160  L(b00):	 ret
   161  	  restore
   162  EPILOGUE()