github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/ia64/popcount.asm (about)

     1  dnl  IA-64 mpn_popcount -- mpn population count.
     2  
     3  dnl  Contributed to the GNU project by Torbjorn Granlund.
     4  
     5  dnl  Copyright 2000-2005 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C           cycles/limb
    36  C Itanium:       1.5
    37  C Itanium 2:     1
    38  
    39  C INPUT PARAMETERS
    40  define(`up', `r32')
    41  define(`n', `r33')
    42  
    43  define(`u0',`r16') define(`u1',`r17') define(`u2',`r18') define(`u3',`r19')
    44  define(`c0',`r28') define(`c1',`r29') define(`c2',`r30') define(`c3',`r31')
    45  define(`s',`r8')
    46  
    47  
    48  ASM_START()
    49  PROLOGUE(mpn_popcount)
    50  	.prologue
    51  ifdef(`HAVE_ABI_32',
    52  `	addp4		up = 0, up		C			M I
    53  	nop.m		0
    54  	zxt4		n = n			C			I
    55  	;;
    56  ')
    57  
    58   {.mmi;	add		r9 = 512, up		C prefetch pointer	M I
    59  	ld8		r10 = [up], 8		C load first limb	M01
    60  	mov.i		r2 = ar.lc		C save ar.lc		I0
    61  }{.mmi;	and		r14 = 3, n		C			M I
    62  	cmp.lt		p15, p14 = 4, n		C small count?		M I
    63  	add		n = -5, n		C			M I
    64  	;;
    65  }{.mmi;	cmp.eq		p6, p0 = 1, r14		C			M I
    66  	cmp.eq		p7, p0 = 2, r14		C			M I
    67  	cmp.eq		p8, p0 = 3, r14		C			M I
    68  }{.bbb
    69    (p6)	br.dptk		.Lb01			C			B
    70    (p7)	br.dptk		.Lb10			C			B
    71    (p8)	br.dptk		.Lb11			C			B
    72  }
    73  
    74  
    75  .Lb00:	ld8		u1 = [up], 8		C			M01
    76  	shr.u		n = n, 2		C			I0
    77  	mov		s = 0			C			M I
    78  	;;
    79  	ld8		u2 = [up], 8		C			M01
    80  	popcnt		c0 = r10		C			I0
    81  	mov.i		ar.lc = n		C			I0
    82  	;;
    83  	ld8		u3 = [up], 8		C			M01
    84  	popcnt		c1 = u1			C			I0
    85    (p15)	br.cond.dptk	.grt4			C			B
    86  	;;
    87  	nop.m	0				C			-
    88  	nop.m	0				C			-
    89  	popcnt		c2 = u2			C			I0
    90  	;;
    91  	mov		s = c0			C			M I
    92  	popcnt		c3 = u3			C			I0
    93  	br		.Lcj4			C			B
    94  
    95  .grt4:	ld8		u0 = [up], 8		C			M01
    96  	popcnt		c2 = u2			C			I0
    97  	br		.LL00			C			B
    98  
    99  
   100  .Lb01:
   101  	popcnt		s = r10			C			I0
   102    (p14)	br.ret.sptk.many b0			C			B
   103  
   104  .grt1:	ld8		u0 = [up], 8		C			M01
   105  	shr.u		n = n, 2		C			I0
   106  	;;
   107  	ld8		u1 = [up], 8		C			M01
   108  	mov.i		ar.lc = n		C			I0
   109  	;;
   110  	ld8		u2 = [up], 8		C			M01
   111  	popcnt		c0 = u0			C			I0
   112  	mov		c3 = 0			C			I0
   113  
   114  	;;
   115  	ld8		u3 = [up], 8		C			M01
   116  	popcnt		c1 = u1			C			I0
   117  	br.cloop.dptk	.Loop			C			B
   118  	br		.Lend			C			B
   119  
   120  
   121  .Lb10:	ld8		u3 = [up], 8		C			M01
   122  	shr.u		n = n, 2		C			I0
   123    (p15)	br.cond.dptk	.grt2			C			B
   124  
   125  	popcnt		s = r10			C			I0
   126  	;;
   127  	popcnt		c3 = u3			C			I0
   128  	br		.Lcj2			C			B
   129  
   130  .grt2:	ld8		u0 = [up], 8		C			M01
   131  	mov.i		ar.lc = n		C			I0
   132  	popcnt		c2 = r10		C			I0
   133  	;;
   134  	ld8		u1 = [up], 8		C			M01
   135  	popcnt		c3 = u3			C			I0
   136  	mov		s = 0			C			M I
   137  	;;
   138  	ld8		u2 = [up], 8		C			M01
   139  	popcnt		c0 = u0			C			I0
   140  	br		.LL10			C			B
   141  
   142  
   143  .Lb11:	ld8		u2 = [up], 8		C			M01
   144  	shr.u		n = n, 2		C			I0
   145  	mov		s = 0			C			M I
   146  	;;
   147  	ld8		u3 = [up], 8		C			M01
   148  	popcnt		s = r10			C			I0
   149    (p15)	br.cond.dptk	.grt3			C			B
   150  
   151  	popcnt		c2 = u2			C			I0
   152  	;;
   153  	popcnt		c3 = u3			C			I0
   154  	br		.Lcj3			C			B
   155  
   156  .grt3:	ld8		u0 = [up], 8		C			M01
   157  	popcnt		c2 = u2			C			I0
   158  	mov.i		ar.lc = n		C			I0
   159  	mov		c1 = 0
   160  	;;
   161  	ld8		u1 = [up], 8		C			M01
   162  	popcnt		c3 = u3			C			I0
   163  	br		.LL11			C			B
   164  
   165  
   166  .Loop:	ld8		u0 = [up], 8		C			M01
   167  	popcnt		c2 = u2			C			I0
   168  	add		s = s, c3		C			M I
   169  	;;
   170  .LL00:	ld8		u1 = [up], 8		C			M01
   171  	popcnt		c3 = u3			C			I0
   172  	add		s = s, c0		C			M I
   173  	;;
   174  .LL11:	ld8		u2 = [up], 8		C			M01
   175  	popcnt		c0 = u0			C			I0
   176  	add		s = s, c1		C			M I
   177  	;;
   178  .LL10:	ld8		u3 = [up], 8		C			M01
   179  	popcnt		c1 = u1			C			I0
   180  	add		s = s, c2		C			M I
   181  	lfetch		[r9], 32		C			M01
   182  	nop.m		0			C			-
   183  	br.cloop.dptk	.Loop			C			B
   184  	;;
   185  
   186  .Lend:	popcnt		c2 = u2			C			I0
   187  	add		s = s, c3		C			M I
   188  	;;
   189  	popcnt		c3 = u3			C			I0
   190  	add		s = s, c0		C			M I
   191  	;;
   192  .Lcj4:	add		s = s, c1		C			M I
   193  	;;
   194  .Lcj3:	add		s = s, c2		C			M I
   195  	;;
   196  .Lcj2:	add		s = s, c3		C			M I
   197  	mov.i		ar.lc = r2		C			I0
   198  	br.ret.sptk.many b0			C			B
   199  EPILOGUE()
   200  ASM_END()