github.com/sbinet/go@v0.0.0-20160827155028-54d7de7dd62b/src/math/big/arith_s390x.s (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !math_big_pure_go,s390x
     6  
     7  #include "textflag.h"
     8  
     9  // This file provides fast assembly versions for the elementary
    10  // arithmetic operations on vectors implemented in arith.go.
    11  
    12  TEXT ·mulWW(SB),NOSPLIT,$0
    13  	MOVD x+0(FP), R3
    14  	MOVD y+8(FP), R4
    15  	MULHDU R3, R4
    16  	MOVD R10, z1+16(FP)
    17  	MOVD R11, z0+24(FP)
    18  	RET
    19  
    20  // func divWW(x1, x0, y Word) (q, r Word)
    21  TEXT ·divWW(SB),NOSPLIT,$0
    22  	MOVD  x1+0(FP), R10
    23  	MOVD  x0+8(FP), R11
    24  	MOVD  y+16(FP), R5
    25  	WORD  $0xb98700a5 // dlgr r10,r5
    26  	MOVD  R11, q+24(FP)
    27  	MOVD  R10, r+32(FP)
    28  	RET
    29  
    30  // DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11
    31  // func addVV(z, x, y []Word) (c Word)
    32  TEXT ·addVV(SB),NOSPLIT,$0
    33  	MOVD z_len+8(FP), R3
    34  	MOVD x+24(FP), R8
    35  	MOVD y+48(FP), R9
    36  	MOVD z+0(FP), R2
    37  
    38  	MOVD $0, R4		// c = 0
    39  	MOVD $0, R0		// make sure it's zero
    40  	MOVD $0, R10		// i = 0
    41  
    42  	// s/JL/JMP/ below to disable the unrolled loop
    43  	SUB  $4, R3		// n -= 4
    44  	BLT v1			// if n < 0 goto v1
    45  
    46  U1:	// n >= 0
    47  	// regular loop body unrolled 4x
    48  	MOVD 0(R8)(R10*1), R5
    49  	MOVD 8(R8)(R10*1), R6
    50  	MOVD 16(R8)(R10*1), R7
    51  	MOVD 24(R8)(R10*1), R1
    52  	ADDC R4, R4		// restore CF
    53  	MOVD 0(R9)(R10*1), R11
    54  	ADDE R11, R5
    55  	MOVD 8(R9)(R10*1), R11
    56  	ADDE R11, R6
    57  	MOVD 16(R9)(R10*1), R11
    58  	ADDE R11, R7
    59  	MOVD 24(R9)(R10*1), R11
    60  	ADDE R11, R1
    61  	MOVD R0, R4
    62  	ADDE R4, R4		// save CF
    63  	NEG  R4, R4
    64  	MOVD R5, 0(R2)(R10*1)
    65  	MOVD R6, 8(R2)(R10*1)
    66  	MOVD R7, 16(R2)(R10*1)
    67  	MOVD R1, 24(R2)(R10*1)
    68  
    69  
    70  	ADD  $32, R10		// i += 4
    71  	SUB  $4,  R3		// n -= 4
    72  	BGE  U1			// if n >= 0 goto U1
    73  
    74  v1:	ADD  $4, R3		// n += 4
    75  	BLE E1			// if n <= 0 goto E1
    76  
    77  L1:	// n > 0
    78  	ADDC R4, R4		// restore CF
    79  	MOVD 0(R8)(R10*1), R5
    80  	MOVD 0(R9)(R10*1), R11
    81  	ADDE R11, R5
    82  	MOVD R5, 0(R2)(R10*1)
    83  	MOVD R0, R4
    84  	ADDE R4, R4		// save CF
    85  	NEG  R4, R4
    86  
    87  	ADD  $8, R10		// i++
    88  	SUB  $1, R3		// n--
    89  	BGT L1			// if n > 0 goto L1
    90  
    91  E1:	NEG  R4, R4
    92  	MOVD R4, c+72(FP)	// return c
    93  	RET
    94  
    95  // DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11
    96  // func subVV(z, x, y []Word) (c Word)
    97  // (same as addVV except for SUBC/SUBE instead of ADDC/ADDE and label names)
    98  TEXT ·subVV(SB),NOSPLIT,$0
    99  	MOVD z_len+8(FP), R3
   100  	MOVD x+24(FP), R8
   101  	MOVD y+48(FP), R9
   102  	MOVD z+0(FP), R2
   103  
   104  	MOVD $0, R4		// c = 0
   105  	MOVD $0, R0		// make sure it's zero
   106  	MOVD $0, R10		// i = 0
   107  
   108  	// s/JL/JMP/ below to disable the unrolled loop
   109  	SUB  $4, R3		// n -= 4
   110  	BLT v1			// if n < 0 goto v1
   111  
   112  U1:	// n >= 0
   113  	// regular loop body unrolled 4x
   114  	MOVD 0(R8)(R10*1), R5
   115  	MOVD 8(R8)(R10*1), R6
   116  	MOVD 16(R8)(R10*1), R7
   117  	MOVD 24(R8)(R10*1), R1
   118  	MOVD R0, R11
   119  	SUBC R4, R11		// restore CF
   120  	MOVD 0(R9)(R10*1), R11
   121  	SUBE R11, R5
   122  	MOVD 8(R9)(R10*1), R11
   123  	SUBE R11, R6
   124  	MOVD 16(R9)(R10*1), R11
   125  	SUBE R11, R7
   126  	MOVD 24(R9)(R10*1), R11
   127  	SUBE R11, R1
   128  	MOVD R0, R4
   129  	SUBE R4, R4		// save CF
   130  	MOVD R5, 0(R2)(R10*1)
   131  	MOVD R6, 8(R2)(R10*1)
   132  	MOVD R7, 16(R2)(R10*1)
   133  	MOVD R1, 24(R2)(R10*1)
   134  
   135  
   136  	ADD  $32, R10		// i += 4
   137  	SUB  $4,  R3		// n -= 4
   138  	BGE  U1			// if n >= 0 goto U1
   139  
   140  v1:	ADD  $4, R3		// n += 4
   141  	BLE E1			// if n <= 0 goto E1
   142  
   143  L1:	// n > 0
   144  	MOVD R0, R11
   145  	SUBC R4, R11		// restore CF
   146  	MOVD 0(R8)(R10*1), R5
   147  	MOVD 0(R9)(R10*1), R11
   148  	SUBE R11, R5
   149  	MOVD R5, 0(R2)(R10*1)
   150  	MOVD R0, R4
   151  	SUBE R4, R4		// save CF
   152  
   153  	ADD  $8, R10		// i++
   154  	SUB  $1, R3		// n--
   155  	BGT L1			// if n > 0 goto L1
   156  
   157  E1:	NEG  R4, R4
   158  	MOVD R4, c+72(FP)	// return c
   159  	RET
   160  
   161  
   162  // func addVW(z, x []Word, y Word) (c Word)
   163  TEXT ·addVW(SB),NOSPLIT,$0
   164  //DI = R3, CX = R4, SI = r10, r8 = r8, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0)
   165  	MOVD z_len+8(FP), R3
   166  	MOVD x+24(FP), R8
   167  	MOVD y+48(FP), R4	// c = y
   168  	MOVD z+0(FP), R2
   169  	MOVD $0, R0		// make sure it's 0
   170  	MOVD $0, R10		// i = 0
   171  
   172  	// s/JL/JMP/ below to disable the unrolled loop
   173  	SUB $4, R3		// n -= 4
   174  	BLT v4			// if n < 4 goto v4
   175  
   176  U4:	// n >= 0
   177  	// regular loop body unrolled 4x
   178  	MOVD 0(R8)(R10*1), R5
   179  	MOVD 8(R8)(R10*1), R6
   180  	MOVD 16(R8)(R10*1), R7
   181  	MOVD 24(R8)(R10*1), R1
   182  	ADDC R4, R5
   183  	ADDE R0, R6
   184  	ADDE R0, R7
   185  	ADDE R0, R1
   186  	ADDE R0, R0
   187  	MOVD R0, R4		// save CF
   188  	SUB  R0, R0
   189  	MOVD R5, 0(R2)(R10*1)
   190  	MOVD R6, 8(R2)(R10*1)
   191  	MOVD R7, 16(R2)(R10*1)
   192  	MOVD R1, 24(R2)(R10*1)
   193  
   194  	ADD $32, R10		// i += 4 -> i +=32
   195  	SUB $4, R3		// n -= 4
   196  	BGE U4			// if n >= 0 goto U4
   197  
   198  v4:	ADD $4, R3		// n += 4
   199  	BLE E4			// if n <= 0 goto E4
   200  
   201  L4:	// n > 0
   202  	MOVD 0(R8)(R10*1), R5
   203  	ADDC R4, R5
   204  	ADDE R0, R0
   205  	MOVD R0, R4		// save CF
   206  	SUB  R0, R0
   207  	MOVD R5, 0(R2)(R10*1)
   208  
   209  	ADD  $8, R10		// i++
   210  	SUB  $1, R3		// n--
   211  	BGT L4			// if n > 0 goto L4
   212  
   213  E4:	MOVD R4, c+56(FP)	// return c
   214  
   215  	RET
   216  
   217  //DI = R3, CX = R4, SI = r10, r8 = r8, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0)
   218  // func subVW(z, x []Word, y Word) (c Word)
   219  // (same as addVW except for SUBC/SUBE instead of ADDC/ADDE and label names)
   220  TEXT ·subVW(SB),NOSPLIT,$0
   221  	MOVD z_len+8(FP), R3
   222  	MOVD x+24(FP), R8
   223  	MOVD y+48(FP), R4	// c = y
   224  	MOVD z+0(FP), R2
   225  	MOVD $0, R0		// make sure it's 0
   226  	MOVD $0, R10		// i = 0
   227  
   228  	// s/JL/JMP/ below to disable the unrolled loop
   229  	SUB $4, R3		// n -= 4
   230  	BLT v4			// if n < 4 goto v4
   231  
   232  U4:	// n >= 0
   233  	// regular loop body unrolled 4x
   234  	MOVD 0(R8)(R10*1), R5
   235  	MOVD 8(R8)(R10*1), R6
   236  	MOVD 16(R8)(R10*1), R7
   237  	MOVD 24(R8)(R10*1), R1
   238  	SUBC R4, R5 //SLGR  -> SUBC
   239  	SUBE R0, R6 //SLBGR -> SUBE
   240  	SUBE R0, R7
   241  	SUBE R0, R1
   242  	SUBE R4, R4		// save CF
   243  	NEG  R4, R4
   244  	MOVD R5, 0(R2)(R10*1)
   245  	MOVD R6, 8(R2)(R10*1)
   246  	MOVD R7, 16(R2)(R10*1)
   247  	MOVD R1, 24(R2)(R10*1)
   248  
   249  	ADD $32, R10		// i += 4 -> i +=32
   250  	SUB $4, R3		// n -= 4
   251  	BGE U4			// if n >= 0 goto U4
   252  
   253  v4:	ADD $4, R3		// n += 4
   254  	BLE E4			// if n <= 0 goto E4
   255  
   256  L4:	// n > 0
   257  	MOVD 0(R8)(R10*1), R5
   258  	SUBC R4, R5
   259  	SUBE R4, R4		// save CF
   260  	NEG  R4, R4
   261  	MOVD R5, 0(R2)(R10*1)
   262  
   263  	ADD  $8, R10		// i++
   264  	SUB  $1, R3		// n--
   265  	BGT L4			// if n > 0 goto L4
   266  
   267  E4:	MOVD R4, c+56(FP)	// return c
   268  
   269  	RET
   270  
   271  // func shlVU(z, x []Word, s uint) (c Word)
   272  TEXT ·shlVU(SB),NOSPLIT,$0
   273  	MOVD z_len+8(FP), R5
   274  	SUB  $1, R5             // n--
   275  	BLT  X8b                // n < 0        (n <= 0)
   276  
   277  	// n > 0
   278  	MOVD s+48(FP), R4
   279  	CMPBEQ	R0, R4, Z80	       //handle 0 case beq
   280  	MOVD $64, R6
   281  	CMPBEQ  R6, R4, Z864	       //handle 64 case beq
   282  	MOVD z+0(FP), R2
   283  	MOVD x+24(FP), R8
   284  	SLD  $3, R5             // n = n*8
   285  	SUB  R4, R6, R7
   286  	MOVD (R8)(R5*1), R10    // w1 = x[i-1]
   287  	SRD  R7, R10, R3
   288  	MOVD R3, c+56(FP)
   289  
   290  	MOVD $0, R1             // i = 0
   291  	BR   E8
   292  
   293  	// i < n-1
   294  L8:	MOVD R10, R3             // w = w1
   295  	MOVD -8(R8)(R5*1), R10   // w1 = x[i+1]
   296  
   297  	SLD  R4,  R3             // w<<s | w1>>ŝ
   298  	SRD  R7, R10, R6
   299  	OR   R6, R3
   300  	MOVD R3, (R2)(R5*1)      // z[i] = w<<s | w1>>ŝ
   301  	SUB  $8, R5              // i--
   302  
   303  E8:	CMPBGT R5, R0, L8        // i < n-1
   304  
   305  	// i >= n-1
   306  X8a:	SLD  R4, R10             // w1<<s
   307  	MOVD R10, (R2)           // z[0] = w1<<s
   308  	RET
   309  
   310  X8b:	MOVD R0, c+56(FP)
   311  	RET
   312  
   313  Z80:	MOVD z+0(FP), R2
   314  	MOVD x+24(FP), R8
   315  	SLD  $3, R5             // n = n*8
   316  
   317  	MOVD (R8), R10
   318  	MOVD $0, R3
   319  	MOVD R3, c+56(FP)
   320  
   321  	MOVD $0, R1             // i = 0
   322  	BR   E8Z
   323  
   324  	// i < n-1
   325  L8Z:	MOVD R10, R3
   326  	MOVD 8(R8)(R1*1), R10
   327  
   328  	MOVD R3, (R2)(R1*1)
   329  	ADD  $8, R1
   330  
   331  E8Z:	CMPBLT R1, R5, L8Z
   332  
   333  	// i >= n-1
   334  	MOVD R10, (R2)(R5*1)
   335  	RET
   336  
   337  Z864:	MOVD z+0(FP), R2
   338  	MOVD x+24(FP), R8
   339  	SLD  $3, R5             // n = n*8
   340  	MOVD (R8)(R5*1), R3     // w1 = x[n-1]
   341  	MOVD R3, c+56(FP)       // z[i] = x[n-1]
   342  
   343  	BR   E864
   344  
   345  	// i < n-1
   346  L864:	MOVD -8(R8)(R5*1), R3
   347  
   348  	MOVD R3, (R2)(R5*1)     // z[i] = x[n-1]
   349  	SUB  $8, R5             // i--
   350  
   351  E864:	CMPBGT R5, R0, L864     // i < n-1
   352  
   353  	MOVD R0, (R2)           // z[n-1] = 0
   354  	RET
   355  
   356  
   357  // CX = R4, r8 = r8, r10 = r2 , r11 = r5, DX = r3, AX = r10 , BX = R1 , 64-count = r7 (R0 set to 0) temp = R6
   358  // func shrVU(z, x []Word, s uint) (c Word)
   359  TEXT ·shrVU(SB),NOSPLIT,$0
   360  	MOVD z_len+8(FP), R5
   361  	SUB  $1, R5             // n--
   362  	BLT  X9b                // n < 0        (n <= 0)
   363  
   364  	// n > 0
   365  	MOVD s+48(FP), R4
   366  	CMPBEQ	R0, R4, ZB0	       //handle 0 case beq
   367  	MOVD $64, R6
   368  	CMPBEQ  R6, R4, ZB64	       //handle 64 case beq
   369  	MOVD z+0(FP), R2
   370  	MOVD x+24(FP), R8
   371  	SLD  $3, R5             // n = n*8
   372  	SUB  R4, R6, R7
   373  	MOVD (R8), R10          // w1 = x[0]
   374  	SLD  R7, R10, R3
   375  	MOVD R3, c+56(FP)
   376  
   377  	MOVD $0, R1            // i = 0
   378  	BR   E9
   379  
   380  	// i < n-1
   381  L9:	MOVD R10, R3            // w = w1
   382  	MOVD 8(R8)(R1*1), R10   // w1 = x[i+1]
   383  
   384  	SRD  R4,  R3            // w>>s | w1<<s
   385  	SLD  R7, R10, R6
   386  	OR   R6, R3
   387  	MOVD R3, (R2)(R1*1)     // z[i] = w>>s | w1<<s
   388  	ADD  $8, R1             // i++
   389  
   390  E9:	CMPBLT R1, R5, L9       // i < n-1
   391  
   392  	// i >= n-1
   393  X9a:	SRD  R4, R10            // w1>>s
   394  	MOVD R10, (R2)(R5*1)    // z[n-1] = w1>>s
   395  	RET
   396  
   397  X9b:	MOVD R0, c+56(FP)
   398  	RET
   399  
   400  ZB0:	MOVD z+0(FP), R2
   401  	MOVD x+24(FP), R8
   402  	SLD  $3, R5             // n = n*8
   403  
   404  	MOVD (R8), R10          // w1 = x[0]
   405  	MOVD $0, R3             // R10 << 64
   406  	MOVD R3, c+56(FP)
   407  
   408  	MOVD $0, R1             // i = 0
   409  	BR   E9Z
   410  
   411  	// i < n-1
   412  L9Z:	MOVD R10, R3            // w = w1
   413  	MOVD 8(R8)(R1*1), R10   // w1 = x[i+1]
   414  
   415  	MOVD R3, (R2)(R1*1)     // z[i] = w>>s | w1<<s
   416  	ADD  $8, R1             // i++
   417  
   418  E9Z:	CMPBLT R1, R5, L9Z      // i < n-1
   419  
   420  	// i >= n-1
   421  	MOVD R10, (R2)(R5*1)    // z[n-1] = w1>>s
   422  	RET
   423  
   424  ZB64:	MOVD z+0(FP), R2
   425  	MOVD x+24(FP), R8
   426  	SLD  $3, R5             // n = n*8
   427  	MOVD (R8), R3          // w1 = x[0]
   428  	MOVD R3, c+56(FP)
   429  
   430  	MOVD $0, R1            // i = 0
   431  	BR   E964
   432  
   433  	// i < n-1
   434  L964:	MOVD 8(R8)(R1*1), R3   // w1 = x[i+1]
   435  
   436  	MOVD R3, (R2)(R1*1)     // z[i] = w>>s | w1<<s
   437  	ADD  $8, R1             // i++
   438  
   439  E964:	CMPBLT R1, R5, L964     // i < n-1
   440  
   441  	// i >= n-1
   442  	MOVD  $0, R10            // w1>>s
   443  	MOVD R10, (R2)(R5*1)    // z[n-1] = w1>>s
   444  	RET
   445  
   446  // CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, DX = r3, AX = r6 , BX = R1 , (R0 set to 0) + use R11 + use R7 for i
   447  // func mulAddVWW(z, x []Word, y, r Word) (c Word)
   448  TEXT ·mulAddVWW(SB),NOSPLIT,$0
   449  	MOVD z+0(FP), R2
   450  	MOVD x+24(FP), R8
   451  	MOVD y+48(FP), R9
   452  	MOVD r+56(FP), R4	// c = r
   453  	MOVD z_len+8(FP), R5
   454  	MOVD $0, R1		// i = 0
   455  	MOVD $0, R7		// i*8 = 0
   456  	MOVD $0, R0		// make sure it's zero
   457  	BR E5
   458  
   459  L5:	MOVD (R8)(R1*1), R6
   460  	MULHDU R9, R6
   461  	ADDC R4, R11 		//add to low order bits
   462  	ADDE R0, R6
   463  	MOVD R11, (R2)(R1*1)
   464  	MOVD R6, R4
   465  	ADD  $8, R1		// i*8 + 8
   466  	ADD  $1, R7		// i++
   467  
   468  E5:	CMPBLT R7, R5, L5	// i < n
   469  
   470  	MOVD R4, c+64(FP)
   471  	RET
   472  
   473  // func addMulVVW(z, x []Word, y Word) (c Word)
   474  // CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, AX = r11, DX = R6, r12=r12, BX = R1 , (R0 set to 0) + use R11 + use R7 for i
   475  TEXT ·addMulVVW(SB),NOSPLIT,$0
   476  	MOVD z+0(FP), R2
   477  	MOVD x+24(FP), R8
   478  	MOVD y+48(FP), R9
   479  	MOVD z_len+8(FP), R5
   480  
   481  	MOVD $0, R1		// i*8 = 0
   482  	MOVD $0, R7		// i = 0
   483  	MOVD $0, R0		// make sure it's zero
   484  	MOVD $0, R4		// c = 0
   485  
   486  	MOVD R5, R12
   487  	AND  $-2, R12
   488  	CMPBGE R5, $2, A6
   489  	BR   E6
   490  
   491  A6:	MOVD (R8)(R1*1), R6
   492  	MULHDU R9, R6
   493  	MOVD (R2)(R1*1), R10
   494  	ADDC R10, R11	//add to low order bits
   495  	ADDE R0, R6
   496  	ADDC R4, R11
   497  	ADDE R0, R6
   498  	MOVD R6, R4
   499  	MOVD R11, (R2)(R1*1)
   500  
   501  	MOVD (8)(R8)(R1*1), R6
   502  	MULHDU R9, R6
   503  	MOVD (8)(R2)(R1*1), R10
   504  	ADDC R10, R11	//add to low order bits
   505  	ADDE R0, R6
   506  	ADDC R4, R11
   507  	ADDE R0, R6
   508  	MOVD R6, R4
   509  	MOVD R11, (8)(R2)(R1*1)
   510  
   511  	ADD  $16, R1		// i*8 + 8
   512  	ADD  $2, R7		// i++
   513  
   514  	CMPBLT R7, R12, A6
   515  	BR E6
   516  
   517  L6:	MOVD (R8)(R1*1), R6
   518  	MULHDU R9, R6
   519  	MOVD (R2)(R1*1), R10
   520  	ADDC R10, R11	//add to low order bits
   521  	ADDE R0, R6
   522  	ADDC R4, R11
   523  	ADDE R0, R6
   524  	MOVD R6, R4
   525  	MOVD R11, (R2)(R1*1)
   526  
   527  	ADD  $8, R1		// i*8 + 8
   528  	ADD  $1, R7		// i++
   529  
   530  E6:	CMPBLT R7, R5, L6	// i < n
   531  
   532  	MOVD R4, c+56(FP)
   533  	RET
   534  
   535  // func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
   536  // CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, AX = r11, DX = R6, r12=r12, BX = R1(*8) , (R0 set to 0) + use R11 + use R7 for i
   537  TEXT ·divWVW(SB),NOSPLIT,$0
   538  	MOVD z+0(FP), R2
   539  	MOVD xn+24(FP), R10	// r = xn
   540  	MOVD x+32(FP), R8
   541  	MOVD y+56(FP), R9
   542  	MOVD z_len+8(FP), R7	// i = z
   543  	SLD  $3, R7, R1		// i*8
   544  	MOVD $0, R0		// make sure it's zero
   545  	BR E7
   546  
   547  L7:	MOVD (R8)(R1*1), R11
   548  	WORD $0xB98700A9  //DLGR R10,R9
   549  	MOVD R11, (R2)(R1*1)
   550  
   551  E7:	SUB  $1, R7		// i--
   552  	SUB  $8, R1
   553  	BGE L7			// i >= 0
   554  
   555  	MOVD R10, r+64(FP)
   556  	RET
   557  
   558  // func bitLen(x Word) (n int)
   559  TEXT ·bitLen(SB),NOSPLIT,$0
   560  	MOVD x+0(FP), R2
   561  	WORD $0xb9830022 // FLOGR R2,R2
   562  	MOVD $64, R3
   563  	SUB  R2, R3
   564  	MOVD R3, n+8(FP)
   565  	RET