github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/ia64/addmul_1.asm (about)

     1  dnl  IA-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
     2  dnl  result to a second limb vector.
     3  
     4  dnl  Contributed to the GNU project by Torbjorn Granlund.
     5  
     6  dnl  Copyright 2000-2005, 2007 Free Software Foundation, Inc.
     7  
     8  dnl  This file is part of the GNU MP Library.
     9  dnl
    10  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    11  dnl  it under the terms of either:
    12  dnl
    13  dnl    * the GNU Lesser General Public License as published by the Free
    14  dnl      Software Foundation; either version 3 of the License, or (at your
    15  dnl      option) any later version.
    16  dnl
    17  dnl  or
    18  dnl
    19  dnl    * the GNU General Public License as published by the Free Software
    20  dnl      Foundation; either version 2 of the License, or (at your option) any
    21  dnl      later version.
    22  dnl
    23  dnl  or both in parallel, as here.
    24  dnl
    25  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    26  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    27  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    28  dnl  for more details.
    29  dnl
    30  dnl  You should have received copies of the GNU General Public License and the
    31  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    32  dnl  see https://www.gnu.org/licenses/.
    33  
    34  include(`../config.m4')
    35  
    36  C         cycles/limb
    37  C Itanium:    3.0
    38  C Itanium 2:  2.0
    39  
    40  C TODO
    41  C  * Further optimize feed-in and wind-down code, both for speed and code size.
    42  C  * Handle low limb input and results specially, using a common stf8 in the
    43  C    epilogue.
    44  C  * Use 1 c/l carry propagation scheme in wind-down code.
    45  C  * Use extra pointer registers for `up' and rp to speed up feed-in loads.
    46  C  * Work out final differences with mul_1.asm.  That function is 300 bytes
    47  C    smaller than this due to better loop scheduling and thus simpler feed-in
    48  C    code.
    49  
    50  C INPUT PARAMETERS
    51  define(`rp', `r32')
    52  define(`up', `r33')
    53  define(`n', `r34')
    54  define(`vl', `r35')
    55  
    56  ASM_START()
    57  PROLOGUE(mpn_addmul_1)
    58  	.prologue
    59  	.save	ar.lc, r2
    60  	.body
    61  
    62  ifdef(`HAVE_ABI_32',
    63  `	addp4		rp = 0, rp		C M I
    64  	addp4		up = 0, up		C M I
    65  	zxt4		n = n			C I
    66  	;;
    67  ')
    68  {.mmi
    69  	adds		r15 = -1, n		C M I
    70  	mov		r20 = rp		C M I
    71  	mov.i		r2 = ar.lc		C I0
    72  }
    73  {.mmi
    74  	ldf8		f7 = [up], 8		C M
    75  	ldf8		f8 = [rp], 8		C M
    76  	and		r14 = 3, n		C M I
    77  	;;
    78  }
    79  {.mmi
    80  	setf.sig	f6 = vl			C M2 M3
    81  	cmp.eq		p10, p0 = 0, r14	C M I
    82  	shr.u		r31 = r15, 2		C I0
    83  }
    84  {.mmi
    85  	cmp.eq		p11, p0 = 2, r14	C M I
    86  	cmp.eq		p12, p0 = 3, r14	C M I
    87  	nop.i		0			C I
    88  	;;
    89  }
    90  {.mii
    91  	cmp.ne		p6, p7 = r0, r0		C M I
    92  	mov.i		ar.lc = r31		C I0
    93  	cmp.ne		p8, p9 = r0, r0		C M I
    94  }
    95  {.bbb
    96    (p10)	br.dptk		.Lb00			C B
    97    (p11)	br.dptk		.Lb10			C B
    98    (p12)	br.dptk		.Lb11			C B
    99  	;;
   100  }
   101  
   102  .Lb01:	br.cloop.dptk	.grt1			C B
   103  
   104  	xma.l		f39 = f7, f6, f8	C F
   105  	xma.hu		f43 = f7, f6, f8	C F
   106  	;;
   107  	getf.sig	r8 = f43		C M2
   108  	stf8		[r20] = f39		C M2 M3
   109  	mov.i		ar.lc = r2		C I0
   110  	br.ret.sptk.many b0			C B
   111  
   112  .grt1:
   113  	ldf8		f32 = [up], 8
   114  	ldf8		f44 = [rp], 8
   115  	;;
   116  	ldf8		f33 = [up], 8
   117  	ldf8		f45 = [rp], 8
   118  	;;
   119  	ldf8		f34 = [up], 8
   120  	xma.l		f39 = f7, f6, f8
   121  	ldf8		f46 = [rp], 8
   122  	xma.hu		f43 = f7, f6, f8
   123  	;;
   124  	ldf8		f35 = [up], 8
   125  	ldf8		f47 = [rp], 8
   126  	br.cloop.dptk	.grt5
   127  
   128  	xma.l		f36 = f32, f6, f44
   129  	xma.hu		f40 = f32, f6, f44
   130  	;;
   131  	stf8		[r20] = f39, 8
   132  	xma.l		f37 = f33, f6, f45
   133  	xma.hu		f41 = f33, f6, f45
   134  	;;
   135  	getf.sig	r31 = f43
   136  	getf.sig	r24 = f36
   137  	xma.l		f38 = f34, f6, f46
   138  	xma.hu		f42 = f34, f6, f46
   139  	;;
   140  	getf.sig	r28 = f40
   141  	getf.sig	r25 = f37
   142  	xma.l		f39 = f35, f6, f47
   143  	xma.hu		f43 = f35, f6, f47
   144  	;;
   145  	getf.sig	r29 = f41
   146  	getf.sig	r26 = f38
   147  	br		.Lcj5
   148  
   149  .grt5:
   150  	mov		r30 = 0
   151  	xma.l		f36 = f32, f6, f44
   152  	xma.hu		f40 = f32, f6, f44
   153  	;;
   154  	ldf8		f32 = [up], 8
   155  	xma.l		f37 = f33, f6, f45
   156  	ldf8		f44 = [rp], 8
   157  	xma.hu		f41 = f33, f6, f45
   158  	;;
   159  	ldf8		f33 = [up], 8
   160  	getf.sig	r27 = f39
   161  	;;
   162  	getf.sig	r31 = f43
   163  	xma.l		f38 = f34, f6, f46
   164  	ldf8		f45 = [rp], 8
   165  	xma.hu		f42 = f34, f6, f46
   166  	;;
   167  	ldf8		f34 = [up], 8
   168  	getf.sig	r24 = f36
   169  	;;
   170  	getf.sig	r28 = f40
   171  	xma.l		f39 = f35, f6, f47
   172  	ldf8		f46 = [rp], 8
   173  	xma.hu		f43 = f35, f6, f47
   174  	;;
   175  	ldf8		f35 = [up], 8
   176  	getf.sig	r25 = f37
   177  	br.cloop.dptk	.Loop
   178  	br		.Le0
   179  
   180  
   181  .Lb10:	ldf8		f35 = [up], 8
   182  	ldf8		f47 = [rp], 8
   183  	br.cloop.dptk	.grt2
   184  
   185  	xma.l		f38 = f7, f6, f8
   186  	xma.hu		f42 = f7, f6, f8
   187  	;;
   188  	xma.l		f39 = f35, f6, f47
   189  	xma.hu		f43 = f35, f6, f47
   190  	;;
   191  	getf.sig	r30 = f42
   192  	stf8		[r20] = f38, 8
   193  	getf.sig	r27 = f39
   194  	getf.sig	r8 = f43
   195  	br		.Lcj2
   196  
   197  .grt2:
   198  	ldf8		f32 = [up], 8
   199  	ldf8		f44 = [rp], 8
   200  	;;
   201  	ldf8		f33 = [up], 8
   202  	xma.l		f38 = f7, f6, f8
   203  	ldf8		f45 = [rp], 8
   204  	xma.hu		f42 = f7, f6, f8
   205  	;;
   206  	ldf8		f34 = [up], 8
   207  	xma.l		f39 = f35, f6, f47
   208  	ldf8		f46 = [rp], 8
   209  	xma.hu		f43 = f35, f6, f47
   210  	;;
   211  	ldf8		f35 = [up], 8
   212  	ldf8		f47 = [rp], 8
   213  	br.cloop.dptk	.grt6
   214  
   215  	stf8		[r20] = f38, 8
   216  	xma.l		f36 = f32, f6, f44
   217  	xma.hu		f40 = f32, f6, f44
   218  	;;
   219  	getf.sig	r30 = f42
   220  	getf.sig	r27 = f39
   221  	xma.l		f37 = f33, f6, f45
   222  	xma.hu		f41 = f33, f6, f45
   223  	;;
   224  	getf.sig	r31 = f43
   225  	getf.sig	r24 = f36
   226  	xma.l		f38 = f34, f6, f46
   227  	xma.hu		f42 = f34, f6, f46
   228  	;;
   229  	getf.sig	r28 = f40
   230  	getf.sig	r25 = f37
   231  	xma.l		f39 = f35, f6, f47
   232  	xma.hu		f43 = f35, f6, f47
   233  	br		.Lcj6
   234  
   235  .grt6:
   236  	mov		r29 = 0
   237  	xma.l		f36 = f32, f6, f44
   238  	xma.hu		f40 = f32, f6, f44
   239  	;;
   240  	ldf8		f32 = [up], 8
   241  	getf.sig	r26 = f38
   242  	;;
   243  	getf.sig	r30 = f42
   244  	xma.l		f37 = f33, f6, f45
   245  	ldf8		f44 = [rp], 8
   246  	xma.hu		f41 = f33, f6, f45
   247  	;;
   248  	ldf8		f33 = [up], 8
   249  	getf.sig	r27 = f39
   250  	;;
   251  	getf.sig	r31 = f43
   252  	xma.l		f38 = f34, f6, f46
   253  	ldf8		f45 = [rp], 8
   254  	xma.hu		f42 = f34, f6, f46
   255  	;;
   256  	ldf8		f34 = [up], 8
   257  	getf.sig	r24 = f36
   258  	br		.LL10
   259  
   260  
   261  .Lb11:	ldf8		f34 = [up], 8
   262  	ldf8		f46 = [rp], 8
   263  	;;
   264  	ldf8		f35 = [up], 8
   265  	ldf8		f47 = [rp], 8
   266  	br.cloop.dptk	.grt3
   267  	;;
   268  
   269  	xma.l		f37 = f7, f6, f8
   270  	xma.hu		f41 = f7, f6, f8
   271  	xma.l		f38 = f34, f6, f46
   272  	xma.hu		f42 = f34, f6, f46
   273  	xma.l		f39 = f35, f6, f47
   274  	xma.hu		f43 = f35, f6, f47
   275  	;;
   276  	getf.sig	r29 = f41
   277  	stf8		[r20] = f37, 8
   278  	getf.sig	r26 = f38
   279  	getf.sig	r30 = f42
   280  	getf.sig	r27 = f39
   281  	getf.sig	r8 = f43
   282  	br		.Lcj3
   283  
   284  .grt3:
   285  	ldf8		f32 = [up], 8
   286  	xma.l		f37 = f7, f6, f8
   287  	ldf8		f44 = [rp], 8
   288  	xma.hu		f41 = f7, f6, f8
   289  	;;
   290  	ldf8		f33 = [up], 8
   291  	xma.l		f38 = f34, f6, f46
   292  	ldf8		f45 = [rp], 8
   293  	xma.hu		f42 = f34, f6, f46
   294  	;;
   295  	ldf8		f34 = [up], 8
   296  	xma.l		f39 = f35, f6, f47
   297  	ldf8		f46 = [rp], 8
   298  	xma.hu		f43 = f35, f6, f47
   299  	;;
   300  	ldf8		f35 = [up], 8
   301  	getf.sig	r25 = f37		C FIXME
   302  	ldf8		f47 = [rp], 8
   303  	br.cloop.dptk	.grt7
   304  
   305  	getf.sig	r29 = f41
   306  	stf8		[r20] = f37, 8		C FIXME
   307  	xma.l		f36 = f32, f6, f44
   308  	getf.sig	r26 = f38
   309  	xma.hu		f40 = f32, f6, f44
   310  	;;
   311  	getf.sig	r30 = f42
   312  	xma.l		f37 = f33, f6, f45
   313  	getf.sig	r27 = f39
   314  	xma.hu		f41 = f33, f6, f45
   315  	;;
   316  	getf.sig	r31 = f43
   317  	xma.l		f38 = f34, f6, f46
   318  	getf.sig	r24 = f36
   319  	xma.hu		f42 = f34, f6, f46
   320  	br		.Lcj7
   321  
   322  .grt7:
   323  	getf.sig	r29 = f41
   324  	xma.l		f36 = f32, f6, f44
   325  	mov		r28 = 0
   326  	xma.hu		f40 = f32, f6, f44
   327  	;;
   328  	ldf8		f32 = [up], 8
   329  	getf.sig	r26 = f38
   330  	;;
   331  	getf.sig	r30 = f42
   332  	xma.l		f37 = f33, f6, f45
   333  	ldf8		f44 = [rp], 8
   334  	xma.hu		f41 = f33, f6, f45
   335  	;;
   336  	ldf8		f33 = [up], 8
   337  	getf.sig	r27 = f39
   338  	br		.LL11
   339  
   340  
   341  .Lb00:	ldf8		f33 = [up], 8
   342  	ldf8		f45 = [rp], 8
   343  	;;
   344  	ldf8		f34 = [up], 8
   345  	ldf8		f46 = [rp], 8
   346  	;;
   347  	ldf8		f35 = [up], 8
   348  	xma.l		f36 = f7, f6, f8
   349  	ldf8		f47 = [rp], 8
   350  	xma.hu		f40 = f7, f6, f8
   351  	br.cloop.dptk	.grt4
   352  
   353  	xma.l		f37 = f33, f6, f45
   354  	xma.hu		f41 = f33, f6, f45
   355  	xma.l		f38 = f34, f6, f46
   356  	xma.hu		f42 = f34, f6, f46
   357  	;;
   358  	getf.sig	r28 = f40
   359  	stf8		[r20] = f36, 8
   360  	xma.l		f39 = f35, f6, f47
   361  	getf.sig	r25 = f37
   362  	xma.hu		f43 = f35, f6, f47
   363  	;;
   364  	getf.sig	r29 = f41
   365  	getf.sig	r26 = f38
   366  	getf.sig	r30 = f42
   367  	getf.sig	r27 = f39
   368  	br		.Lcj4
   369  
   370  .grt4:
   371  	ldf8		f32 = [up], 8
   372  	xma.l		f37 = f33, f6, f45
   373  	ldf8		f44 = [rp], 8
   374  	xma.hu		f41 = f33, f6, f45
   375  	;;
   376  	ldf8		f33 = [up], 8
   377  	xma.l		f38 = f34, f6, f46
   378  	ldf8		f45 = [rp], 8
   379  	xma.hu		f42 = f34, f6, f46
   380  	;;
   381  	ldf8		f34 = [up], 8
   382  	getf.sig	r24 = f36		C FIXME
   383  	xma.l		f39 = f35, f6, f47
   384  	ldf8		f46 = [rp], 8
   385  	getf.sig	r28 = f40
   386  	xma.hu		f43 = f35, f6, f47
   387  	;;
   388  	ldf8		f35 = [up], 8
   389  	getf.sig	r25 = f37
   390  	ldf8		f47 = [rp], 8
   391  	br.cloop.dptk	.grt8
   392  
   393  	getf.sig	r29 = f41
   394  	stf8		[r20] = f36, 8		C FIXME
   395  	xma.l		f36 = f32, f6, f44
   396  	getf.sig	r26 = f38
   397  	getf.sig	r30 = f42
   398  	xma.hu		f40 = f32, f6, f44
   399  	;;
   400  	xma.l		f37 = f33, f6, f45
   401  	getf.sig	r27 = f39
   402  	xma.hu		f41 = f33, f6, f45
   403  	br		.Lcj8
   404  
   405  .grt8:
   406  	getf.sig	r29 = f41
   407  	xma.l		f36 = f32, f6, f44
   408  	mov		r31 = 0
   409  	xma.hu		f40 = f32, f6, f44
   410  	;;
   411  	ldf8		f32 = [up], 8
   412  	getf.sig	r26 = f38
   413  	br		.LL00
   414  
   415  
   416  C *** MAIN LOOP START ***
   417  	ALIGN(32)				C insn	fed	cycle #
   418  .Loop:
   419  	.pred.rel "mutex", p6, p7		C num	by	i1 i2
   420  	getf.sig	r29 = f41		C 00	16	0   0
   421  	xma.l		f36 = f32, f6, f44	C 01	06,15	0   0
   422     (p6)	add		r14 = r30, r27, 1	C 02		0   0
   423  	ldf8		f47 = [rp], 8		C 03		0   0
   424  	xma.hu		f40 = f32, f6, f44	C 04	06,15	0   0
   425     (p7)	add		r14 = r30, r27		C 05		0   0
   426  	;;
   427  	.pred.rel "mutex", p6, p7
   428  	ldf8		f32 = [up], 8		C 06		1   1
   429     (p6)	cmp.leu		p8, p9 = r14, r27	C 07		1   1
   430     (p7)	cmp.ltu		p8, p9 = r14, r27	C 08		1   1
   431  	getf.sig	r26 = f38		C 09	25	2   1
   432  	st8		[r20] = r14, 8		C 10		2   1
   433  	nop.b		0			C 11		2   1
   434  	;;
   435  .LL00:
   436  	.pred.rel "mutex", p8, p9
   437  	getf.sig	r30 = f42		C 12	28	3   2
   438  	xma.l		f37 = f33, f6, f45	C 13	18,27	3   2
   439     (p8)	add		r16 = r31, r24, 1	C 14		3   2
   440  	ldf8		f44 = [rp], 8		C 15		3   2
   441  	xma.hu		f41 = f33, f6, f45	C 16	18,27	3   2
   442     (p9)	add		r16 = r31, r24		C 17		3   2
   443  	;;
   444  	.pred.rel "mutex", p8, p9
   445  	ldf8		f33 = [up], 8		C 18		4   3
   446     (p8)	cmp.leu		p6, p7 = r16, r24	C 19		4   3
   447     (p9)	cmp.ltu		p6, p7 = r16, r24	C 20		4   3
   448  	getf.sig	r27 = f39		C 21	37	5   3
   449  	st8		[r20] = r16, 8		C 22		5   3
   450  	nop.b		0			C 23		5   3
   451  	;;
   452  .LL11:
   453  	.pred.rel "mutex", p6, p7
   454  	getf.sig	r31 = f43		C 24	40	6   4
   455  	xma.l		f38 = f34, f6, f46	C 25	30,39	6   4
   456     (p6)	add		r14 = r28, r25, 1	C 26		6   4
   457  	ldf8		f45 = [rp], 8		C 27		6   4
   458  	xma.hu		f42 = f34, f6, f46	C 28	30,39	6   4
   459     (p7)	add		r14 = r28, r25		C 29		6   4
   460  	;;
   461  	.pred.rel "mutex", p6, p7
   462  	ldf8		f34 = [up], 8		C 30		7   5
   463     (p6)	cmp.leu		p8, p9 = r14, r25	C 31		7   5
   464     (p7)	cmp.ltu		p8, p9 = r14, r25	C 32		7   5
   465  	getf.sig	r24 = f36		C 33	01	8   5
   466  	st8		[r20] = r14, 8		C 34		8   5
   467  	nop.b		0			C 35		8   5
   468  	;;
   469  .LL10:
   470  	.pred.rel "mutex", p8, p9
   471  	getf.sig	r28 = f40		C 36	04	9   6
   472  	xma.l		f39 = f35, f6, f47	C 37	42,03	9   6
   473     (p8)	add		r16 = r29, r26, 1	C 38		9   6
   474  	ldf8		f46 = [rp], 8		C 39		9   6
   475  	xma.hu		f43 = f35, f6, f47	C 40	42,03	9   6
   476     (p9)	add		r16 = r29, r26		C 41		9   6
   477  	;;
   478  	.pred.rel "mutex", p8, p9
   479  	ldf8		f35 = [up], 8		C 42	       10   7
   480     (p8)	cmp.leu		p6, p7 = r16, r26	C 43	       10   7
   481     (p9)	cmp.ltu		p6, p7 = r16, r26	C 44	       10   7
   482  	getf.sig	r25 = f37		C 45	13     11   7
   483  	st8		[r20] = r16, 8		C 46	       11   7
   484  	br.cloop.dptk	.Loop			C 47	       11   7
   485  C *** MAIN LOOP END ***
   486  	;;
   487  .Le0:
   488  	.pred.rel "mutex", p6, p7
   489  	getf.sig	r29 = f41		C
   490  	xma.l		f36 = f32, f6, f44	C
   491     (p6)	add		r14 = r30, r27, 1	C
   492  	ldf8		f47 = [rp], 8		C
   493  	xma.hu		f40 = f32, f6, f44	C
   494     (p7)	add		r14 = r30, r27		C
   495  	;;
   496  	.pred.rel "mutex", p6, p7
   497     (p6)	cmp.leu		p8, p9 = r14, r27	C
   498     (p7)	cmp.ltu		p8, p9 = r14, r27	C
   499  	getf.sig	r26 = f38		C
   500  	st8		[r20] = r14, 8		C
   501  	;;
   502  	.pred.rel "mutex", p8, p9
   503  	getf.sig	r30 = f42		C
   504  	xma.l		f37 = f33, f6, f45	C
   505     (p8)	add		r16 = r31, r24, 1	C
   506  	xma.hu		f41 = f33, f6, f45	C
   507     (p9)	add		r16 = r31, r24		C
   508  	;;
   509  	.pred.rel "mutex", p8, p9
   510     (p8)	cmp.leu		p6, p7 = r16, r24	C
   511     (p9)	cmp.ltu		p6, p7 = r16, r24	C
   512  	getf.sig	r27 = f39		C
   513  	st8		[r20] = r16, 8		C
   514  	;;
   515  .Lcj8:
   516  	.pred.rel "mutex", p6, p7
   517  	getf.sig	r31 = f43		C
   518  	xma.l		f38 = f34, f6, f46	C
   519     (p6)	add		r14 = r28, r25, 1	C
   520  	xma.hu		f42 = f34, f6, f46	C
   521     (p7)	add		r14 = r28, r25		C
   522  	;;
   523  	.pred.rel "mutex", p6, p7
   524     (p6)	cmp.leu		p8, p9 = r14, r25	C
   525     (p7)	cmp.ltu		p8, p9 = r14, r25	C
   526  	getf.sig	r24 = f36		C
   527  	st8		[r20] = r14, 8		C
   528  	;;
   529  .Lcj7:
   530  	.pred.rel "mutex", p8, p9
   531  	getf.sig	r28 = f40		C
   532  	xma.l		f39 = f35, f6, f47	C
   533     (p8)	add		r16 = r29, r26, 1	C
   534  	xma.hu		f43 = f35, f6, f47	C
   535     (p9)	add		r16 = r29, r26		C
   536  	;;
   537  	.pred.rel "mutex", p8, p9
   538     (p8)	cmp.leu		p6, p7 = r16, r26	C
   539     (p9)	cmp.ltu		p6, p7 = r16, r26	C
   540  	getf.sig	r25 = f37		C
   541  	st8		[r20] = r16, 8		C
   542  	;;
   543  .Lcj6:
   544  	.pred.rel "mutex", p6, p7
   545  	getf.sig	r29 = f41		C
   546     (p6)	add		r14 = r30, r27, 1	C
   547     (p7)	add		r14 = r30, r27		C
   548  	;;
   549  	.pred.rel "mutex", p6, p7
   550     (p6)	cmp.leu		p8, p9 = r14, r27	C
   551     (p7)	cmp.ltu		p8, p9 = r14, r27	C
   552  	getf.sig	r26 = f38		C
   553  	st8		[r20] = r14, 8		C
   554  	;;
   555  .Lcj5:
   556  	.pred.rel "mutex", p8, p9
   557  	getf.sig	r30 = f42		C
   558     (p8)	add		r16 = r31, r24, 1	C
   559     (p9)	add		r16 = r31, r24		C
   560  	;;
   561  	.pred.rel "mutex", p8, p9
   562     (p8)	cmp.leu		p6, p7 = r16, r24	C
   563     (p9)	cmp.ltu		p6, p7 = r16, r24	C
   564  	getf.sig	r27 = f39		C
   565  	st8		[r20] = r16, 8		C
   566  	;;
   567  .Lcj4:
   568  	.pred.rel "mutex", p6, p7
   569  	getf.sig	r8 = f43		C
   570     (p6)	add		r14 = r28, r25, 1	C
   571     (p7)	add		r14 = r28, r25		C
   572  	;;
   573  	.pred.rel "mutex", p6, p7
   574  	st8		[r20] = r14, 8		C
   575     (p6)	cmp.leu		p8, p9 = r14, r25	C
   576     (p7)	cmp.ltu		p8, p9 = r14, r25	C
   577  	;;
   578  .Lcj3:
   579  	.pred.rel "mutex", p8, p9
   580     (p8)	add		r16 = r29, r26, 1	C
   581     (p9)	add		r16 = r29, r26		C
   582  	;;
   583  	.pred.rel "mutex", p8, p9
   584  	st8		[r20] = r16, 8		C
   585     (p8)	cmp.leu		p6, p7 = r16, r26	C
   586     (p9)	cmp.ltu		p6, p7 = r16, r26	C
   587  	;;
   588  .Lcj2:
   589  	.pred.rel "mutex", p6, p7
   590     (p6)	add		r14 = r30, r27, 1	C
   591     (p7)	add		r14 = r30, r27		C
   592  	;;
   593  	.pred.rel "mutex", p6, p7
   594  	st8		[r20] = r14		C
   595     (p6)	cmp.leu		p8, p9 = r14, r27	C
   596     (p7)	cmp.ltu		p8, p9 = r14, r27	C
   597  	;;
   598     (p8)	add		r8 = 1, r8		C M I
   599  	mov.i		ar.lc = r2		C I0
   600  	br.ret.sptk.many b0			C B
   601  EPILOGUE()
   602  ASM_END()