github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/ia64/logops_n.asm (about)

     1  dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
     2  dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
     3  
     4  dnl  Contributed to the GNU project by Torbjorn Granlund.
     5  
     6  dnl  Copyright 2003-2005 Free Software Foundation, Inc.
     7  
     8  dnl  This file is part of the GNU MP Library.
     9  dnl
    10  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    11  dnl  it under the terms of either:
    12  dnl
    13  dnl    * the GNU Lesser General Public License as published by the Free
    14  dnl      Software Foundation; either version 3 of the License, or (at your
    15  dnl      option) any later version.
    16  dnl
    17  dnl  or
    18  dnl
    19  dnl    * the GNU General Public License as published by the Free Software
    20  dnl      Foundation; either version 2 of the License, or (at your option) any
    21  dnl      later version.
    22  dnl
    23  dnl  or both in parallel, as here.
    24  dnl
    25  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    26  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    27  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    28  dnl  for more details.
    29  dnl
    30  dnl  You should have received copies of the GNU General Public License and the
    31  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    32  dnl  see https://www.gnu.org/licenses/.
    33  
    34  include(`../config.m4')
    35  
    36  C           cycles/limb
    37  C Itanium:      2
    38  C Itanium 2:    1
    39  
    40  C TODO
    41  C  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in
    42  C    wind-down code).
    43  
    44  C INPUT PARAMETERS
    45  define(`rp', `r32')
    46  define(`up', `r33')
    47  define(`vp', `r34')
    48  define(`n', `r35')
    49  
    50  ifdef(`OPERATION_and_n',
    51  `	define(`func',`mpn_and_n')
    52  	define(`logop',		`and	$1 = $2, $3')
    53  	define(`notormov',	`mov	$1 = $2')')
    54  ifdef(`OPERATION_andn_n',
    55  `	define(`func',`mpn_andn_n')
    56  	define(`logop',		`andcm	$1 = $2, $3')
    57  	define(`notormov',	`mov	$1 = $2')')
    58  ifdef(`OPERATION_nand_n',
    59  `	define(`func',`mpn_nand_n')
    60  	define(`logop',		`and	$1 = $2, $3')
    61  	define(`notormov',	`sub	$1 = -1, $2')')
    62  ifdef(`OPERATION_ior_n',
    63  `	define(`func',`mpn_ior_n')
    64  	define(`logop',		`or	$1 = $2, $3')
    65  	define(`notormov',	`mov	$1 = $2')')
    66  ifdef(`OPERATION_iorn_n',
    67  `	define(`func',`mpn_iorn_n')
    68  	define(`logop',		`andcm	$1 = $3, $2')
    69  	define(`notormov',	`sub	$1 = -1, $2')')
    70  ifdef(`OPERATION_nior_n',
    71  `	define(`func',`mpn_nior_n')
    72  	define(`logop',		`or	$1 = $2, $3')
    73  	define(`notormov',	`sub	$1 = -1, $2')')
    74  ifdef(`OPERATION_xor_n',
    75  `	define(`func',`mpn_xor_n')
    76  	define(`logop',		`xor	$1 = $2, $3')
    77  	define(`notormov',	`mov	$1 = $2')')
    78  ifdef(`OPERATION_xnor_n',
    79  `	define(`func',`mpn_xnor_n')
    80  	define(`logop',		`xor	$1 = $2, $3')
    81  	define(`notormov',	`sub	$1 = -1, $2')')
    82  
    83  MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
    84  
    85  ASM_START()
    86  PROLOGUE(func)
    87  	.prologue
    88  	.save	ar.lc, r2
    89  	.body
    90  ifdef(`HAVE_ABI_32',
    91  `	addp4	rp = 0, rp			C			M I
    92  	addp4	up = 0, up			C			M I
    93  	addp4	vp = 0, vp			C			M I
    94  	nop.m		0
    95  	nop.m		0
    96  	zxt4	n = n				C			I
    97  	;;
    98  ')
    99  {.mmi
   100  	ld8		r10 = [up], 8		C			M
   101  	ld8		r11 = [vp], 8		C			M
   102  	mov.i		r2 = ar.lc		C			I0
   103  }
   104  {.mmi
   105  	and		r14 = 3, n		C			M I
   106  	cmp.lt		p15, p14 = 4, n		C			M I
   107  	shr.u		n = n, 2		C			I0
   108  	;;
   109  }
   110  {.mmi
   111  	cmp.eq		p6, p0 = 1, r14		C			M I
   112  	cmp.eq		p7, p0 = 2, r14		C			M I
   113  	cmp.eq		p8, p0 = 3, r14		C			M I
   114  }
   115  {.bbb
   116     (p6)	br.dptk		.Lb01			C			B
   117     (p7)	br.dptk		.Lb10			C			B
   118     (p8)	br.dptk		.Lb11			C			B
   119  }
   120  
   121  .Lb00:	ld8		r17 = [up], 8		C			M
   122  	ld8		r21 = [vp], 8		C			M
   123  	add		n = -2, n		C			M I
   124  	;;
   125  	ld8		r18 = [up], 8		C			M
   126  	ld8		r22 = [vp], 8		C			M
   127  	;;
   128  	ld8		r19 = [up], 8		C			M
   129  	ld8		r23 = [vp], 8		C			M
   130    (p15)	br.cond.dpnt	.grt4			C			B
   131  
   132  	logop(		r14, r10, r11)		C			M I
   133  	;;
   134  	logop(		r15, r17, r21)		C			M I
   135  	notormov(	r8, r14)		C			M I
   136  	br		.Lcj4			C			B
   137  
   138  .grt4:	logop(		r14, r10, r11)		C			M I
   139  	ld8		r16 = [up], 8		C			M
   140  	ld8		r20 = [vp], 8		C			M
   141  	;;
   142  	logop(		r15, r17, r21)		C			M I
   143  	ld8		r17 = [up], 8		C			M
   144  	mov.i		ar.lc = n		C			I0
   145  	notormov(	r8, r14)		C			M I
   146  	ld8		r21 = [vp], 8		C			M
   147  	br		.LL00			C			B
   148  
   149  .Lb01:	add		n = -1, n		C			M I
   150  	logop(		r15, r10, r11)		C			M I
   151    (p15)	br.cond.dpnt	.grt1			C			B
   152  	;;
   153  
   154  	notormov(	r9, r15)		C			M I
   155  	br		.Lcj1			C			B
   156  
   157  .grt1:	ld8		r16 = [up], 8		C			M
   158  	ld8		r20 = [vp], 8		C			M
   159  	;;
   160  	ld8		r17 = [up], 8		C			M
   161  	ld8		r21 = [vp], 8		C			M
   162  	mov.i		ar.lc = n		C			I0
   163  	;;
   164  	ld8		r18 = [up], 8		C			M
   165  	ld8		r22 = [vp], 8		C			M
   166  	;;
   167  	ld8		r19 = [up], 8		C			M
   168  	ld8		r23 = [vp], 8		C			M
   169  	br.cloop.dptk	.grt5			C			B
   170  	;;
   171  
   172  	logop(		r14, r16, r20)		C			M I
   173  	notormov(	r9, r15)		C			M I
   174  	br		.Lcj5			C			B
   175  
   176  .grt5:	logop(		r14, r16, r20)		C			M I
   177  	ld8		r16 = [up], 8		C			M
   178  	notormov(	r9, r15)		C			M I
   179  	ld8		r20 = [vp], 8		C			M
   180  	br		.LL01			C			B
   181  
   182  .Lb10:	ld8		r19 = [up], 8		C			M
   183  	ld8		r23 = [vp], 8		C			M
   184    (p15)	br.cond.dpnt	.grt2			C			B
   185  
   186  	logop(		r14, r10, r11)		C			M I
   187  	;;
   188  	logop(		r15, r19, r23)		C			M I
   189  	notormov(	r8, r14)		C			M I
   190  	br		.Lcj2			C			B
   191  
   192  .grt2:	ld8		r16 = [up], 8		C			M
   193  	ld8		r20 = [vp], 8		C			M
   194  	add		n = -1, n		C			M I
   195  	;;
   196  	ld8		r17 = [up], 8		C			M
   197  	ld8		r21 = [vp], 8		C			M
   198  	logop(		r14, r10, r11)		C			M I
   199  	;;
   200  	ld8		r18 = [up], 8		C			M
   201  	ld8		r22 = [vp], 8		C			M
   202  	mov.i		ar.lc = n		C			I0
   203  	;;
   204  	logop(		r15, r19, r23)		C			M I
   205  	ld8		r19 = [up], 8		C			M
   206  	notormov(	r8, r14)		C			M I
   207  	ld8		r23 = [vp], 8		C			M
   208  	br.cloop.dptk	.Loop			C			B
   209  	br		.Lcj6			C			B
   210  
   211  .Lb11:	ld8		r18 = [up], 8		C			M
   212  	ld8		r22 = [vp], 8		C			M
   213  	add		n = -1, n		C			M I
   214  	;;
   215  	ld8		r19 = [up], 8		C			M
   216  	ld8		r23 = [vp], 8		C			M
   217  	logop(		r15, r10, r11)		C			M I
   218    (p15)	br.cond.dpnt	.grt3			C			B
   219  	;;
   220  
   221  	logop(		r14, r18, r22)		C			M I
   222  	notormov(	r9, r15)		C			M I
   223  	br		.Lcj3			C			B
   224  
   225  .grt3:	ld8		r16 = [up], 8		C			M
   226  	ld8		r20 = [vp], 8		C			M
   227  	;;
   228  	ld8		r17 = [up], 8		C			M
   229  	ld8		r21 = [vp], 8		C			M
   230  	mov.i		ar.lc = n		C			I0
   231  	;;
   232  	logop(		r14, r18, r22)		C			M I
   233  	ld8		r18 = [up], 8		C			M
   234  	notormov(	r9, r15)		C			M I
   235  	ld8		r22 = [vp], 8		C			M
   236  	br		.LL11			C			B
   237  
   238  C *** MAIN LOOP START ***
   239  	ALIGN(32)
   240  .Loop:	st8		[rp] = r8, 8		C			M
   241  	logop(		r14, r16, r20)		C			M I
   242  	notormov(	r9, r15)		C			M I
   243  	ld8		r16 = [up], 8		C			M
   244  	ld8		r20 = [vp], 8		C			M
   245  	nop.b		0
   246  	;;
   247  .LL01:	st8		[rp] = r9, 8		C			M
   248  	logop(		r15, r17, r21)		C			M I
   249  	notormov(	r8, r14)		C			M I
   250  	ld8		r17 = [up], 8		C			M
   251  	ld8		r21 = [vp], 8		C			M
   252  	nop.b		0
   253  	;;
   254  .LL00:	st8		[rp] = r8, 8		C			M
   255  	logop(		r14, r18, r22)		C			M I
   256  	notormov(	r9, r15)		C			M I
   257  	ld8		r18 = [up], 8		C			M
   258  	ld8		r22 = [vp], 8		C			M
   259  	nop.b		0
   260  	;;
   261  .LL11:	st8		[rp] = r9, 8		C			M
   262  	logop(		r15, r19, r23)		C			M I
   263  	notormov(	r8, r14)		C			M I
   264  	ld8		r19 = [up], 8		C			M
   265  	ld8		r23 = [vp], 8		C			M
   266  	br.cloop.dptk	.Loop	;;		C			B
   267  C *** MAIN LOOP END ***
   268  
   269  .Lcj6:	st8		[rp] = r8, 8		C			M
   270  	logop(		r14, r16, r20)		C			M I
   271  	notormov(	r9, r15)		C			M I
   272  	;;
   273  .Lcj5:	st8		[rp] = r9, 8		C			M
   274  	logop(		r15, r17, r21)		C			M I
   275  	notormov(	r8, r14)		C			M I
   276  	;;
   277  .Lcj4:	st8		[rp] = r8, 8		C			M
   278  	logop(		r14, r18, r22)		C			M I
   279  	notormov(	r9, r15)		C			M I
   280  	;;
   281  .Lcj3:	st8		[rp] = r9, 8		C			M
   282  	logop(		r15, r19, r23)		C			M I
   283  	notormov(	r8, r14)		C			M I
   284  	;;
   285  .Lcj2:	st8		[rp] = r8, 8		C			M
   286  	notormov(	r9, r15)		C			M I
   287  	;;
   288  .Lcj1:	st8		[rp] = r9, 8		C			M
   289  	mov.i		ar.lc = r2		C			I0
   290  	br.ret.sptk.many b0			C			B
   291  EPILOGUE()
   292  ASM_END()