github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/ia64/submul_1.asm (about)

     1  dnl  IA-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
     2  dnl  result from a second limb vector.
     3  
     4  dnl  Contributed to the GNU project by Torbjorn Granlund.
     5  
     6  dnl  Copyright 2000-2004 Free Software Foundation, Inc.
     7  
     8  dnl  This file is part of the GNU MP Library.
     9  dnl
    10  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    11  dnl  it under the terms of either:
    12  dnl
    13  dnl    * the GNU Lesser General Public License as published by the Free
    14  dnl      Software Foundation; either version 3 of the License, or (at your
    15  dnl      option) any later version.
    16  dnl
    17  dnl  or
    18  dnl
    19  dnl    * the GNU General Public License as published by the Free Software
    20  dnl      Foundation; either version 2 of the License, or (at your option) any
    21  dnl      later version.
    22  dnl
    23  dnl  or both in parallel, as here.
    24  dnl
    25  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    26  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    27  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    28  dnl  for more details.
    29  dnl
    30  dnl  You should have received copies of the GNU General Public License and the
    31  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    32  dnl  see https://www.gnu.org/licenses/.
    33  
    34  include(`../config.m4')
    35  
    36  C         cycles/limb
    37  C Itanium:    4.0
    38  C Itanium 2:  2.25 (alignment dependent, sometimes it seems to need 3 c/l)
    39  
    40  C TODO
    41  C  * Optimize feed-in and wind-down code, both for speed and code size.
    42  C  * Handle low limb input and results specially, using a common stf8 in the
    43  C    epilogue.
    44  C  * Delay r8, r10 initialization, put cmp-p6 in 1st bundle and br .Ldone in
    45  C    2nd bundle.  This will allow the bbb bundle to be one cycle earlier and
    46  C    save a cycle.
    47  
    48  C INPUT PARAMETERS
    49  define(`rp', `r32')
    50  define(`up', `r33')
    51  define(`n',  `r34')
    52  define(`vl', `r35')
    53  
    54  ASM_START()
    55  PROLOGUE(mpn_submul_1)
    56  	.prologue
    57  	.save	ar.lc, r2
    58  	.body
    59  
    60  ifdef(`HAVE_ABI_32',
    61  `	addp4		rp = 0, rp		C M I
    62  	addp4		up = 0, up		C M I
    63  	zxt4		n = n			C I
    64  	;;
    65  ')
    66  {.mmi
    67  	mov		r10 = rp		C M I
    68  	mov		r9 = up			C M I
    69  	sub		vl = r0, vl		C M I	negate vl
    70  }
    71  {.mmi
    72  	ldf8		f8 = [rp], 8		C M
    73  	ldf8		f7 = [up], 8		C M
    74  	add		r19 = -1, n		C M I	n - 1
    75  	;;
    76  }
    77  {.mmi
    78  	cmp.eq		p6, p0 = 0, vl		C M I
    79  	mov		r8 = 0			C M I	zero cylimb
    80  	mov		r2 = ar.lc		C I0
    81  }
    82  {.mmi
    83  	setf.sig	f6 = vl			C M2 M3
    84  	and		r14 = 3, n		C M I
    85  	shr.u		r19 = r19, 2		C I0
    86  	;;
    87  }
    88  {.mmb
    89  	nop		0
    90  	cmp.eq		p10, p0 = 0, r14	C M I
    91     (p6)	br.spnt		.Ldone			C B	vl == 0
    92  }
    93  {.mmi
    94  	cmp.eq		p11, p0 = 2, r14	C M I
    95  	cmp.eq		p12, p0 = 3, r14	C M I
    96  	mov		ar.lc = r19		C I0
    97  }
    98  {.bbb
    99    (p10)	br.dptk		.Lb00			C B
   100    (p11)	br.dptk		.Lb10			C B
   101    (p12)	br.dptk		.Lb11			C B
   102  	;;
   103  }
   104  
   105  .Lb01:	br.cloop.dptk	.grt1
   106  
   107  	xma.l		f39 = f7, f6, f8
   108  	xma.hu		f43 = f7, f6, f8
   109  	;;
   110  	getf.sig	r27 = f39			C lo
   111  	getf.sig	r31 = f43			C hi
   112  	ld8		r20 = [r9], 8
   113  	br		.Lcj1
   114  
   115  .grt1:	ldf8		f44 = [rp], 8
   116  	ldf8		f32 = [up], 8
   117  	;;
   118  	ldf8		f45 = [rp], 8
   119  	ldf8		f33 = [up], 8
   120  	;;
   121  	ldf8		f46 = [rp], 8
   122  	xma.l		f39 = f7, f6, f8
   123  	ldf8		f34 = [up], 8
   124  	xma.hu		f43 = f7, f6, f8
   125  	;;
   126  	ldf8		f47 = [rp], 8
   127  	xma.l		f36 = f32, f6, f44
   128  	ldf8		f35 = [up], 8
   129  	xma.hu		f40 = f32, f6, f44
   130  	br.cloop.dptk	.grt5
   131  	;;
   132  
   133  	getf.sig	r27 = f39			C lo
   134  	xma.l		f37 = f33, f6, f45
   135  	ld8		r20 = [r9], 8
   136  	xma.hu		f41 = f33, f6, f45
   137  	;;
   138  	getf.sig	r31 = f43			C hi
   139  	getf.sig	r24 = f36			C lo
   140  	xma.l		f38 = f34, f6, f46
   141  	ld8		r21 = [r9], 8
   142  	xma.hu		f42 = f34, f6, f46
   143  	;;
   144  	getf.sig	r28 = f40			C hi
   145  	getf.sig	r25 = f37			C lo
   146  	xma.l		f39 = f35, f6, f47
   147  	ld8		r22 = [r9], 8
   148  	xma.hu		f43 = f35, f6, f47
   149  	;;
   150  	getf.sig	r29 = f41			C hi
   151  	getf.sig	r26 = f38			C lo
   152  	ld8		r23 = [r9], 8
   153  	br		.Lcj5
   154  
   155  .grt5:	ldf8		f44 = [rp], 8
   156  	ldf8		f32 = [up], 8
   157  	;;
   158  	getf.sig	r27 = f39			C lo
   159  	xma.l		f37 = f33, f6, f45
   160  	ld8		r20 = [r9], 8
   161  	xma.hu		f41 = f33, f6, f45
   162  	;;
   163  	ldf8		f45 = [rp], 8
   164  	getf.sig	r31 = f43			C hi
   165  	ldf8		f33 = [up], 8
   166  	;;
   167  	getf.sig	r24 = f36			C lo
   168  	xma.l		f38 = f34, f6, f46
   169  	ld8		r21 = [r9], 8
   170  	xma.hu		f42 = f34, f6, f46
   171  	;;
   172  	ldf8		f46 = [rp], 8
   173  	getf.sig	r28 = f40			C hi
   174  	ldf8		f34 = [up], 8
   175  	;;
   176  	getf.sig	r25 = f37			C lo
   177  	xma.l		f39 = f35, f6, f47
   178  	ld8		r22 = [r9], 8
   179  	xma.hu		f43 = f35, f6, f47
   180  	;;
   181  	ldf8		f47 = [rp], 8
   182  	getf.sig	r29 = f41			C hi
   183  	ldf8		f35 = [up], 8
   184  	;;
   185  	getf.sig	r26 = f38			C lo
   186  	xma.l		f36 = f32, f6, f44
   187  	ld8		r23 = [r9], 8
   188  	xma.hu		f40 = f32, f6, f44
   189  	br.cloop.dptk	.Loop
   190  	br		.Lend
   191  
   192  
   193  .Lb10:	ldf8		f47 = [rp], 8
   194  	ldf8		f35 = [up], 8
   195  	br.cloop.dptk	.grt2
   196  
   197  	xma.l		f38 = f7, f6, f8
   198  	xma.hu		f42 = f7, f6, f8
   199  	;;
   200  	xma.l		f39 = f35, f6, f47
   201  	xma.hu		f43 = f35, f6, f47
   202  	;;
   203  	getf.sig	r26 = f38			C lo
   204  	getf.sig	r30 = f42			C hi
   205  	ld8		r23 = [r9], 8
   206  	;;
   207  	getf.sig	r27 = f39			C lo
   208  	getf.sig	r31 = f43			C hi
   209  	ld8		r20 = [r9], 8
   210  	br		.Lcj2
   211  
   212  .grt2:	ldf8		f44 = [rp], 8
   213  	ldf8		f32 = [up], 8
   214  	;;
   215  	ldf8		f45 = [rp], 8
   216  	ldf8		f33 = [up], 8
   217  	xma.l		f38 = f7, f6, f8
   218  	xma.hu		f42 = f7, f6, f8
   219  	;;
   220  	ldf8		f46 = [rp], 8
   221  	ldf8		f34 = [up], 8
   222  	xma.l		f39 = f35, f6, f47
   223  	xma.hu		f43 = f35, f6, f47
   224  	;;
   225  	ldf8		f47 = [rp], 8
   226  	ldf8		f35 = [up], 8
   227  	;;
   228  	getf.sig	r26 = f38			C lo
   229  	xma.l		f36 = f32, f6, f44
   230  	ld8		r23 = [r9], 8
   231  	xma.hu		f40 = f32, f6, f44
   232  	br.cloop.dptk	.grt6
   233  
   234  	getf.sig	r30 = f42			C hi
   235  	;;
   236  	getf.sig	r27 = f39			C lo
   237  	xma.l		f37 = f33, f6, f45
   238  	ld8		r20 = [r9], 8
   239  	xma.hu		f41 = f33, f6, f45
   240  	;;
   241  	getf.sig	r31 = f43			C hi
   242  	getf.sig	r24 = f36			C lo
   243  	xma.l		f38 = f34, f6, f46
   244  	ld8		r21 = [r9], 8
   245  	xma.hu		f42 = f34, f6, f46
   246  	;;
   247  	getf.sig	r28 = f40			C hi
   248  	getf.sig	r25 = f37			C lo
   249  	xma.l		f39 = f35, f6, f47
   250  	ld8		r22 = [r9], 8
   251  	xma.hu		f43 = f35, f6, f47
   252  	br		.Lcj6
   253  
   254  .grt6:	ldf8		f44 = [rp], 8
   255  	getf.sig	r30 = f42			C hi
   256  	ldf8		f32 = [up], 8
   257  	;;
   258  	getf.sig	r27 = f39			C lo
   259  	xma.l		f37 = f33, f6, f45
   260  	ld8		r20 = [r9], 8
   261  	xma.hu		f41 = f33, f6, f45
   262  	;;
   263  	ldf8		f45 = [rp], 8
   264  	getf.sig	r31 = f43			C hi
   265  	ldf8		f33 = [up], 8
   266  	;;
   267  	getf.sig	r24 = f36			C lo
   268  	xma.l		f38 = f34, f6, f46
   269  	ld8		r21 = [r9], 8
   270  	xma.hu		f42 = f34, f6, f46
   271  	;;
   272  	ldf8		f46 = [rp], 8
   273  	getf.sig	r28 = f40			C hi
   274  	ldf8		f34 = [up], 8
   275  	;;
   276  	getf.sig	r25 = f37			C lo
   277  	xma.l		f39 = f35, f6, f47
   278  	ld8		r22 = [r9], 8
   279  	xma.hu		f43 = f35, f6, f47
   280  	br		.LL10
   281  
   282  
   283  .Lb11:	ldf8		f46 = [rp], 8
   284  	ldf8		f34 = [up], 8
   285  	;;
   286  	ldf8		f47 = [rp], 8
   287  	ldf8		f35 = [up], 8
   288  	br.cloop.dptk	.grt3
   289  
   290  	xma.l		f37 = f7, f6, f8
   291  	xma.hu		f41 = f7, f6, f8
   292  	;;
   293  	xma.l		f38 = f34, f6, f46
   294  	xma.hu		f42 = f34, f6, f46
   295  	;;
   296  	getf.sig	r25 = f37			C lo
   297  	xma.l		f39 = f35, f6, f47
   298  	xma.hu		f43 = f35, f6, f47
   299  	;;
   300  	getf.sig	r29 = f41			C hi
   301  	ld8		r22 = [r9], 8
   302  	;;
   303  	getf.sig	r26 = f38			C lo
   304  	getf.sig	r30 = f42			C hi
   305  	ld8		r23 = [r9], 8
   306  	;;
   307  	getf.sig	r27 = f39			C lo
   308  	getf.sig	r31 = f43			C hi
   309  	ld8		r20 = [r9], 8
   310  	br		.Lcj3
   311  
   312  .grt3:	ldf8		f44 = [rp], 8
   313  	xma.l		f37 = f7, f6, f8
   314  	ldf8		f32 = [up], 8
   315  	xma.hu		f41 = f7, f6, f8
   316  	;;
   317  	ldf8		f45 = [rp], 8
   318  	xma.l		f38 = f34, f6, f46
   319  	ldf8		f33 = [up], 8
   320  	xma.hu		f42 = f34, f6, f46
   321  	;;
   322  	ldf8		f46 = [rp], 8
   323  	ldf8		f34 = [up], 8
   324  	;;
   325  	getf.sig	r25 = f37			C lo
   326  	xma.l		f39 = f35, f6, f47
   327  	ld8		r22 = [r9], 8
   328  	xma.hu		f43 = f35, f6, f47
   329  	;;
   330  	ldf8		f47 = [rp], 8
   331  	getf.sig	r29 = f41			C hi
   332  	ldf8		f35 = [up], 8
   333  	;;
   334  	getf.sig	r26 = f38			C lo
   335  	xma.l		f36 = f32, f6, f44
   336  	ld8		r23 = [r9], 8
   337  	xma.hu		f40 = f32, f6, f44
   338  	br.cloop.dptk	.grt7
   339  	;;
   340  
   341  	getf.sig	r30 = f42			C hi
   342  	getf.sig	r27 = f39			C lo
   343  	xma.l		f37 = f33, f6, f45
   344  	ld8		r20 = [r9], 8
   345  	xma.hu		f41 = f33, f6, f45
   346  	;;
   347  	getf.sig	r31 = f43			C hi
   348  	getf.sig	r24 = f36			C lo
   349  	xma.l		f38 = f34, f6, f46
   350  	ld8		r21 = [r9], 8
   351  	xma.hu		f42 = f34, f6, f46
   352  	br		.Lcj7
   353  
   354  .grt7:	ldf8		f44 = [rp], 8
   355  	getf.sig	r30 = f42			C hi
   356  	ldf8		f32 = [up], 8
   357  	;;
   358  	getf.sig	r27 = f39			C lo
   359  	xma.l		f37 = f33, f6, f45
   360  	ld8		r20 = [r9], 8
   361  	xma.hu		f41 = f33, f6, f45
   362  	;;
   363  	ldf8		f45 = [rp], 8
   364  	getf.sig	r31 = f43			C hi
   365  	ldf8		f33 = [up], 8
   366  	;;
   367  	getf.sig	r24 = f36			C lo
   368  	xma.l		f38 = f34, f6, f46
   369  	ld8		r21 = [r9], 8
   370  	xma.hu		f42 = f34, f6, f46
   371  	br		.LL11
   372  
   373  
   374  .Lb00:	ldf8		f45 = [rp], 8
   375  	ldf8		f33 = [up], 8
   376  	;;
   377  	ldf8		f46 = [rp], 8
   378  	ldf8		f34 = [up], 8
   379  	;;
   380  	ldf8		f47 = [rp], 8
   381  	xma.l		f36 = f7, f6, f8
   382  	ldf8		f35 = [up], 8
   383  	xma.hu		f40 = f7, f6, f8
   384  	br.cloop.dptk	.grt4
   385  
   386  	xma.l		f37 = f33, f6, f45
   387  	xma.hu		f41 = f33, f6, f45
   388  	;;
   389  	getf.sig	r24 = f36			C lo
   390  	xma.l		f38 = f34, f6, f46
   391  	ld8		r21 = [r9], 8
   392  	xma.hu		f42 = f34, f6, f46
   393  	;;
   394  	getf.sig	r28 = f40			C hi
   395  	xma.l		f39 = f35, f6, f47
   396  	getf.sig	r25 = f37			C lo
   397  	ld8		r22 = [r9], 8
   398  	xma.hu		f43 = f35, f6, f47
   399  	;;
   400  	getf.sig	r29 = f41			C hi
   401  	getf.sig	r26 = f38			C lo
   402  	ld8		r23 = [r9], 8
   403  	;;
   404  	getf.sig	r30 = f42			C hi
   405  	getf.sig	r27 = f39			C lo
   406  	ld8		r20 = [r9], 8
   407  	br		.Lcj4
   408  
   409  .grt4:	ldf8		f44 = [rp], 8
   410  	xma.l		f37 = f33, f6, f45
   411  	ldf8		f32 = [up], 8
   412  	xma.hu		f41 = f33, f6, f45
   413  	;;
   414  	ldf8		f45 = [rp], 8
   415  	ldf8		f33 = [up], 8
   416  	xma.l		f38 = f34, f6, f46
   417  	getf.sig	r24 = f36			C lo
   418  	ld8		r21 = [r9], 8
   419  	xma.hu		f42 = f34, f6, f46
   420  	;;
   421  	ldf8		f46 = [rp], 8
   422  	getf.sig	r28 = f40			C hi
   423  	ldf8		f34 = [up], 8
   424  	xma.l		f39 = f35, f6, f47
   425  	getf.sig	r25 = f37			C lo
   426  	ld8		r22 = [r9], 8
   427  	xma.hu		f43 = f35, f6, f47
   428  	;;
   429  	ldf8		f47 = [rp], 8
   430  	getf.sig	r29 = f41			C hi
   431  	ldf8		f35 = [up], 8
   432  	;;
   433  	getf.sig	r26 = f38			C lo
   434  	xma.l		f36 = f32, f6, f44
   435  	ld8		r23 = [r9], 8
   436  	xma.hu		f40 = f32, f6, f44
   437  	br.cloop.dptk	.grt8
   438  	;;
   439  
   440  	getf.sig	r30 = f42			C hi
   441  	getf.sig	r27 = f39			C lo
   442  	xma.l		f37 = f33, f6, f45
   443  	ld8		r20 = [r9], 8
   444  	xma.hu		f41 = f33, f6, f45
   445  	br		.Lcj8
   446  
   447  .grt8:	ldf8		f44 = [rp], 8
   448  	getf.sig	r30 = f42			C hi
   449  	ldf8		f32 = [up], 8
   450  	;;
   451  	getf.sig	r27 = f39			C lo
   452  	xma.l		f37 = f33, f6, f45
   453  	ld8		r20 = [r9], 8
   454  	xma.hu		f41 = f33, f6, f45
   455  	br		.LL00
   456  
   457  	ALIGN(32)
   458  .Loop:
   459  {.mmi
   460  	ldf8		f44 = [rp], 8
   461  	cmp.ltu		p6, p0 = r27, r8	C lo cmp
   462  	sub		r14 = r27, r8		C lo sub
   463  }
   464  {.mmi
   465  	getf.sig	r30 = f42			C hi
   466  	ldf8		f32 = [up], 8
   467  	sub		r8 = r20, r31		C hi sub
   468  	;;				C 01
   469  }
   470  {.mmf
   471  	getf.sig	r27 = f39			C lo
   472  	st8		[r10] = r14, 8
   473  	xma.l		f37 = f33, f6, f45
   474  }
   475  {.mfi
   476  	ld8		r20 = [r9], 8
   477  	xma.hu		f41 = f33, f6, f45
   478     (p6)	add		r8 = 1, r8
   479  	;;				C 02
   480  }
   481  {.mmi
   482  .LL00:	ldf8		f45 = [rp], 8
   483  	cmp.ltu		p6, p0 = r24, r8
   484  	sub		r14 = r24, r8
   485  }
   486  {.mmi
   487  	getf.sig	r31 = f43			C hi
   488  	ldf8		f33 = [up], 8
   489  	sub		r8 = r21, r28
   490  	;;				C 03
   491  }
   492  {.mmf
   493  	getf.sig	r24 = f36			C lo
   494  	st8		[r10] = r14, 8
   495  	xma.l		f38 = f34, f6, f46
   496  }
   497  {.mfi
   498  	ld8		r21 = [r9], 8
   499  	xma.hu		f42 = f34, f6, f46
   500     (p6)	add		r8 = 1, r8
   501  	;;				C 04
   502  }
   503  {.mmi
   504  .LL11:	ldf8		f46 = [rp], 8
   505  	cmp.ltu		p6, p0 = r25, r8
   506  	sub		r14 = r25, r8
   507  }
   508  {.mmi
   509  	getf.sig	r28 = f40			C hi
   510  	ldf8		f34 = [up], 8
   511  	sub		r8 = r22, r29
   512  	;;				C 05
   513  }
   514  {.mmf
   515  	getf.sig	r25 = f37			C lo
   516  	st8		[r10] = r14, 8
   517  	xma.l		f39 = f35, f6, f47
   518  }
   519  {.mfi
   520  	ld8		r22 = [r9], 8
   521  	xma.hu		f43 = f35, f6, f47
   522     (p6)	add		r8 = 1, r8
   523  	;;				C 06
   524  }
   525  {.mmi
   526  .LL10:	ldf8		f47 = [rp], 8
   527  	cmp.ltu		p6, p0 = r26, r8
   528  	sub		r14 = r26, r8
   529  }
   530  {.mmi
   531  	getf.sig	r29 = f41			C hi
   532  	ldf8		f35 = [up], 8
   533  	sub		r8 = r23, r30
   534  	;;				C 07
   535  }
   536  {.mmf
   537  	getf.sig	r26 = f38			C lo
   538  	st8		[r10] = r14, 8
   539  	xma.l		f36 = f32, f6, f44
   540  }
   541  {.mfi
   542  	ld8		r23 = [r9], 8
   543  	xma.hu		f40 = f32, f6, f44
   544     (p6)	add		r8 = 1, r8
   545  }
   546  	br.cloop.dptk	.Loop
   547  	;;
   548  
   549  .Lend:
   550  	cmp.ltu		p6, p0 = r27, r8
   551  	sub		r14 = r27, r8
   552  	getf.sig	r30 = f42
   553  	sub		r8 = r20, r31
   554  	;;
   555  	getf.sig	r27 = f39
   556  	st8		[r10] = r14, 8
   557  	xma.l		f37 = f33, f6, f45
   558  	ld8		r20 = [r9], 8
   559  	xma.hu		f41 = f33, f6, f45
   560     (p6)	add		r8 = 1, r8
   561  	;;
   562  .Lcj8:
   563  	cmp.ltu		p6, p0 = r24, r8
   564  	sub		r14 = r24, r8
   565  	getf.sig	r31 = f43
   566  	sub		r8 = r21, r28
   567  	;;
   568  	getf.sig	r24 = f36
   569  	st8		[r10] = r14, 8
   570  	xma.l		f38 = f34, f6, f46
   571  	ld8		r21 = [r9], 8
   572  	xma.hu		f42 = f34, f6, f46
   573     (p6)	add		r8 = 1, r8
   574  	;;
   575  .Lcj7:
   576  	cmp.ltu		p6, p0 = r25, r8
   577  	sub		r14 = r25, r8
   578  	getf.sig	r28 = f40
   579  	sub		r8 = r22, r29
   580  	;;
   581  	getf.sig	r25 = f37
   582  	st8		[r10] = r14, 8
   583  	xma.l		f39 = f35, f6, f47
   584  	ld8		r22 = [r9], 8
   585  	xma.hu		f43 = f35, f6, f47
   586     (p6)	add		r8 = 1, r8
   587  	;;
   588  .Lcj6:
   589  	cmp.ltu		p6, p0 = r26, r8
   590  	sub		r14 = r26, r8
   591  	getf.sig	r29 = f41
   592  	sub		r8 = r23, r30
   593  	;;
   594  	getf.sig	r26 = f38
   595  	st8		[r10] = r14, 8
   596  	ld8		r23 = [r9], 8
   597     (p6)	add		r8 = 1, r8
   598  	;;
   599  .Lcj5:
   600  	cmp.ltu		p6, p0 = r27, r8
   601  	sub		r14 = r27, r8
   602  	getf.sig	r30 = f42
   603  	sub		r8 = r20, r31
   604  	;;
   605  	getf.sig	r27 = f39
   606  	st8		[r10] = r14, 8
   607  	ld8		r20 = [r9], 8
   608     (p6)	add		r8 = 1, r8
   609  	;;
   610  .Lcj4:
   611  	cmp.ltu		p6, p0 = r24, r8
   612  	sub		r14 = r24, r8
   613  	getf.sig	r31 = f43
   614  	sub		r8 = r21, r28
   615  	;;
   616  	st8		[r10] = r14, 8
   617     (p6)	add		r8 = 1, r8
   618  	;;
   619  .Lcj3:
   620  	cmp.ltu		p6, p0 = r25, r8
   621  	sub		r14 = r25, r8
   622  	sub		r8 = r22, r29
   623  	;;
   624  	st8		[r10] = r14, 8
   625     (p6)	add		r8 = 1, r8
   626  	;;
   627  .Lcj2:
   628  	cmp.ltu		p6, p0 = r26, r8
   629  	sub		r14 = r26, r8
   630  	sub		r8 = r23, r30
   631  	;;
   632  	st8		[r10] = r14, 8
   633     (p6)	add		r8 = 1, r8
   634  	;;
   635  .Lcj1:
   636  	cmp.ltu		p6, p0 = r27, r8
   637  	sub		r14 = r27, r8
   638  	sub		r8 = r20, r31
   639  	;;
   640  	st8		[r10] = r14, 8
   641  	mov		ar.lc = r2
   642     (p6)	add		r8 = 1, r8
   643  	br.ret.sptk.many b0
   644  .Ldone:	mov		ar.lc = r2
   645  	br.ret.sptk.many b0
   646  EPILOGUE()
   647  ASM_END()