github.com/riscv/riscv-go@v0.0.0-20200123204226-124ebd6fcc8e/src/crypto/sha512/sha512block_ppc64le.s (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // SHA512 block routine. See sha512block.go for Go equivalent.
     8  //
     9  // The algorithm is detailed in FIPS 180-4:
    10  //
    11  //  http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    12  //
    13  // Wt = Mt; for 0 <= t <= 15
    14  // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
    15  //
    16  // a = H0
    17  // b = H1
    18  // c = H2
    19  // d = H3
    20  // e = H4
    21  // f = H5
    22  // g = H6
    23  // h = H7
    24  //
    25  // for t = 0 to 79 {
    26  //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    27  //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    28  //    h = g
    29  //    g = f
    30  //    f = e
    31  //    e = d + T1
    32  //    d = c
    33  //    c = b
    34  //    b = a
    35  //    a = T1 + T2
    36  // }
    37  //
    38  // H0 = a + H0
    39  // H1 = b + H1
    40  // H2 = c + H2
    41  // H3 = d + H3
    42  // H4 = e + H4
    43  // H5 = f + H5
    44  // H6 = g + H6
    45  // H7 = h + H7
    46  
    47  // Wt = Mt; for 0 <= t <= 15
    48  #define MSGSCHEDULE0(index) \
    49  	MOVD	(index*8)(R6), R14; \
    50  	RLWNM	$24, R14, $-1, R21; \
    51  	RLWMI	$8, R14, $0x00FF0000, R21; \
    52  	RLWMI	$8, R14, $0x000000FF, R21; \
    53  	SLD	$32, R21; \
    54  	SRD	$32, R14, R20; \
    55  	RLWNM	$24, R20, $-1, R14; \
    56  	RLWMI	$8, R20, $0x00FF0000, R14; \
    57  	RLWMI	$8, R20, $0x000000FF, R14; \
    58  	OR	R21, R14; \
    59  	MOVD	R14, (index*8)(R9)
    60  
    61  // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
    62  //   SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
    63  //   SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
    64  #define MSGSCHEDULE1(index) \
    65  	MOVD	((index-2)*8)(R9), R14; \
    66  	MOVD	R14, R16; \
    67  	RLDCL	$64-19, R14, $-1, R14; \
    68  	MOVD	R16, R17; \
    69  	RLDCL	$64-61, R16, $-1, R16; \
    70  	SRD	$6, R17; \
    71  	MOVD	((index-15)*8)(R9), R15; \
    72  	XOR	R16, R14; \
    73  	MOVD	R15, R16; \
    74  	XOR	R17, R14; \
    75  	RLDCL	$64-1, R15, $-1, R15; \
    76  	MOVD	R16, R17; \
    77  	SRD	$7, R17; \
    78  	RLDCL	$64-8, R16, $-1, R16; \
    79  	MOVD	((index-7)*8)(R9), R21; \
    80  	ADD	R21, R14; \
    81  	XOR	R16, R15; \
    82  	XOR	R17, R15; \
    83  	MOVD	((index-16)*8)(R9), R21; \
    84  	ADD	R21, R15; \
    85  	ADD	R15, R14; \
    86  	MOVD	R14, ((index)*8)(R9)
    87  
    88  // Calculate T1 in R14 - uses R14, R16 and R17 registers.
    89  // h is also used as an accumulator. Wt is passed in R14.
    90  //   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
    91  //     BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
    92  //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    93  #define SHA512T1(const, e, f, g, h) \
    94  	MOVD	$const, R17; \
    95  	ADD	R14, h; \
    96  	MOVD	e, R14; \
    97  	ADD	R17, h; \
    98  	MOVD	e, R16; \
    99  	RLDCL	$64-14, R14, $-1, R14; \
   100  	MOVD	e, R17; \
   101  	RLDCL	$64-18, R16, $-1, R16; \
   102  	XOR	R16, R14; \
   103  	MOVD	e, R16; \
   104  	RLDCL	$64-41, R17, $-1, R17; \
   105  	AND	f, R16; \
   106  	XOR	R14, R17; \
   107  	MOVD	e, R14; \
   108  	NOR	R14, R14, R14; \
   109  	ADD	R17, h; \
   110  	AND	g, R14; \
   111  	XOR	R16, R14; \
   112  	ADD	h, R14
   113  
   114  // Calculate T2 in R15 - uses R15, R16, R17 and R8 registers.
   115  //   T2 = BIGSIGMA0(a) + Maj(a, b, c)
   116  //     BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
   117  //     Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
   118  #define SHA512T2(a, b, c) \
   119  	MOVD	a, R8; \
   120  	MOVD	c, R15; \
   121  	RLDCL	$64-28, R8, $-1, R8; \
   122  	MOVD	a, R17; \
   123  	AND	b, R15; \
   124  	RLDCL	$64-34, R17, $-1, R17; \
   125  	MOVD	a, R16; \
   126  	AND	c, R16; \
   127  	XOR	R17, R8; \
   128  	XOR	R16, R15; \
   129  	MOVD	a, R17; \
   130  	MOVD	b, R16; \
   131  	RLDCL	$64-39, R17, $-1, R17; \
   132  	AND	a, R16; \
   133  	XOR	R16, R15; \
   134  	XOR	R17, R8; \
   135  	ADD	R8, R15
   136  
   137  // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
   138  // The values for e and a are stored in d and h, ready for rotation.
   139  #define SHA512ROUND(index, const, a, b, c, d, e, f, g, h) \
   140  	SHA512T1(const, e, f, g, h); \
   141  	SHA512T2(a, b, c); \
   142  	MOVD	R15, h; \
   143  	ADD	R14, d; \
   144  	ADD	R14, h
   145  
   146  #define SHA512ROUND0(index, const, a, b, c, d, e, f, g, h) \
   147  	MSGSCHEDULE0(index); \
   148  	SHA512ROUND(index, const, a, b, c, d, e, f, g, h)
   149  
   150  #define SHA512ROUND1(index, const, a, b, c, d, e, f, g, h) \
   151  	MSGSCHEDULE1(index); \
   152  	SHA512ROUND(index, const, a, b, c, d, e, f, g, h)
   153  
   154  // func block(dig *digest, p []byte)
   155  TEXT ·block(SB),0,$680-32
   156  	MOVD	p_base+8(FP), R6
   157  	MOVD	p_len+16(FP), R7
   158  	SRD	$7, R7
   159  	SLD	$7, R7
   160  
   161  	ADD	R6, R7, R8
   162  	MOVD	R8, 640(R1)
   163  	CMP	R6, R8
   164  	BEQ	end
   165  
   166  	MOVD	dig+0(FP), R9
   167  	MOVD	(0*8)(R9), R22		// a = H0
   168  	MOVD	(1*8)(R9), R23		// b = H1
   169  	MOVD	(2*8)(R9), R24		// c = H2
   170  	MOVD	(3*8)(R9), R25		// d = H3
   171  	MOVD	(4*8)(R9), R26		// e = H4
   172  	MOVD	(5*8)(R9), R27		// f = H5
   173  	MOVD	(6*8)(R9), R28		// g = H6
   174  	MOVD	(7*8)(R9), R29		// h = H7
   175  
   176  loop:
   177  	MOVD	R1, R9			// R9: message schedule
   178  
   179  	SHA512ROUND0(0, 0x428a2f98d728ae22, R22, R23, R24, R25, R26, R27, R28, R29)
   180  	SHA512ROUND0(1, 0x7137449123ef65cd, R29, R22, R23, R24, R25, R26, R27, R28)
   181  	SHA512ROUND0(2, 0xb5c0fbcfec4d3b2f, R28, R29, R22, R23, R24, R25, R26, R27)
   182  	SHA512ROUND0(3, 0xe9b5dba58189dbbc, R27, R28, R29, R22, R23, R24, R25, R26)
   183  	SHA512ROUND0(4, 0x3956c25bf348b538, R26, R27, R28, R29, R22, R23, R24, R25)
   184  	SHA512ROUND0(5, 0x59f111f1b605d019, R25, R26, R27, R28, R29, R22, R23, R24)
   185  	SHA512ROUND0(6, 0x923f82a4af194f9b, R24, R25, R26, R27, R28, R29, R22, R23)
   186  	SHA512ROUND0(7, 0xab1c5ed5da6d8118, R23, R24, R25, R26, R27, R28, R29, R22)
   187  	SHA512ROUND0(8, 0xd807aa98a3030242, R22, R23, R24, R25, R26, R27, R28, R29)
   188  	SHA512ROUND0(9, 0x12835b0145706fbe, R29, R22, R23, R24, R25, R26, R27, R28)
   189  	SHA512ROUND0(10, 0x243185be4ee4b28c, R28, R29, R22, R23, R24, R25, R26, R27)
   190  	SHA512ROUND0(11, 0x550c7dc3d5ffb4e2, R27, R28, R29, R22, R23, R24, R25, R26)
   191  	SHA512ROUND0(12, 0x72be5d74f27b896f, R26, R27, R28, R29, R22, R23, R24, R25)
   192  	SHA512ROUND0(13, 0x80deb1fe3b1696b1, R25, R26, R27, R28, R29, R22, R23, R24)
   193  	SHA512ROUND0(14, 0x9bdc06a725c71235, R24, R25, R26, R27, R28, R29, R22, R23)
   194  	SHA512ROUND0(15, 0xc19bf174cf692694, R23, R24, R25, R26, R27, R28, R29, R22)
   195  
   196  	SHA512ROUND1(16, 0xe49b69c19ef14ad2, R22, R23, R24, R25, R26, R27, R28, R29)
   197  	SHA512ROUND1(17, 0xefbe4786384f25e3, R29, R22, R23, R24, R25, R26, R27, R28)
   198  	SHA512ROUND1(18, 0x0fc19dc68b8cd5b5, R28, R29, R22, R23, R24, R25, R26, R27)
   199  	SHA512ROUND1(19, 0x240ca1cc77ac9c65, R27, R28, R29, R22, R23, R24, R25, R26)
   200  	SHA512ROUND1(20, 0x2de92c6f592b0275, R26, R27, R28, R29, R22, R23, R24, R25)
   201  	SHA512ROUND1(21, 0x4a7484aa6ea6e483, R25, R26, R27, R28, R29, R22, R23, R24)
   202  	SHA512ROUND1(22, 0x5cb0a9dcbd41fbd4, R24, R25, R26, R27, R28, R29, R22, R23)
   203  	SHA512ROUND1(23, 0x76f988da831153b5, R23, R24, R25, R26, R27, R28, R29, R22)
   204  	SHA512ROUND1(24, 0x983e5152ee66dfab, R22, R23, R24, R25, R26, R27, R28, R29)
   205  	SHA512ROUND1(25, 0xa831c66d2db43210, R29, R22, R23, R24, R25, R26, R27, R28)
   206  	SHA512ROUND1(26, 0xb00327c898fb213f, R28, R29, R22, R23, R24, R25, R26, R27)
   207  	SHA512ROUND1(27, 0xbf597fc7beef0ee4, R27, R28, R29, R22, R23, R24, R25, R26)
   208  	SHA512ROUND1(28, 0xc6e00bf33da88fc2, R26, R27, R28, R29, R22, R23, R24, R25)
   209  	SHA512ROUND1(29, 0xd5a79147930aa725, R25, R26, R27, R28, R29, R22, R23, R24)
   210  	SHA512ROUND1(30, 0x06ca6351e003826f, R24, R25, R26, R27, R28, R29, R22, R23)
   211  	SHA512ROUND1(31, 0x142929670a0e6e70, R23, R24, R25, R26, R27, R28, R29, R22)
   212  	SHA512ROUND1(32, 0x27b70a8546d22ffc, R22, R23, R24, R25, R26, R27, R28, R29)
   213  	SHA512ROUND1(33, 0x2e1b21385c26c926, R29, R22, R23, R24, R25, R26, R27, R28)
   214  	SHA512ROUND1(34, 0x4d2c6dfc5ac42aed, R28, R29, R22, R23, R24, R25, R26, R27)
   215  	SHA512ROUND1(35, 0x53380d139d95b3df, R27, R28, R29, R22, R23, R24, R25, R26)
   216  	SHA512ROUND1(36, 0x650a73548baf63de, R26, R27, R28, R29, R22, R23, R24, R25)
   217  	SHA512ROUND1(37, 0x766a0abb3c77b2a8, R25, R26, R27, R28, R29, R22, R23, R24)
   218  	SHA512ROUND1(38, 0x81c2c92e47edaee6, R24, R25, R26, R27, R28, R29, R22, R23)
   219  	SHA512ROUND1(39, 0x92722c851482353b, R23, R24, R25, R26, R27, R28, R29, R22)
   220  	SHA512ROUND1(40, 0xa2bfe8a14cf10364, R22, R23, R24, R25, R26, R27, R28, R29)
   221  	SHA512ROUND1(41, 0xa81a664bbc423001, R29, R22, R23, R24, R25, R26, R27, R28)
   222  	SHA512ROUND1(42, 0xc24b8b70d0f89791, R28, R29, R22, R23, R24, R25, R26, R27)
   223  	SHA512ROUND1(43, 0xc76c51a30654be30, R27, R28, R29, R22, R23, R24, R25, R26)
   224  	SHA512ROUND1(44, 0xd192e819d6ef5218, R26, R27, R28, R29, R22, R23, R24, R25)
   225  	SHA512ROUND1(45, 0xd69906245565a910, R25, R26, R27, R28, R29, R22, R23, R24)
   226  	SHA512ROUND1(46, 0xf40e35855771202a, R24, R25, R26, R27, R28, R29, R22, R23)
   227  	SHA512ROUND1(47, 0x106aa07032bbd1b8, R23, R24, R25, R26, R27, R28, R29, R22)
   228  	SHA512ROUND1(48, 0x19a4c116b8d2d0c8, R22, R23, R24, R25, R26, R27, R28, R29)
   229  	SHA512ROUND1(49, 0x1e376c085141ab53, R29, R22, R23, R24, R25, R26, R27, R28)
   230  	SHA512ROUND1(50, 0x2748774cdf8eeb99, R28, R29, R22, R23, R24, R25, R26, R27)
   231  	SHA512ROUND1(51, 0x34b0bcb5e19b48a8, R27, R28, R29, R22, R23, R24, R25, R26)
   232  	SHA512ROUND1(52, 0x391c0cb3c5c95a63, R26, R27, R28, R29, R22, R23, R24, R25)
   233  	SHA512ROUND1(53, 0x4ed8aa4ae3418acb, R25, R26, R27, R28, R29, R22, R23, R24)
   234  	SHA512ROUND1(54, 0x5b9cca4f7763e373, R24, R25, R26, R27, R28, R29, R22, R23)
   235  	SHA512ROUND1(55, 0x682e6ff3d6b2b8a3, R23, R24, R25, R26, R27, R28, R29, R22)
   236  	SHA512ROUND1(56, 0x748f82ee5defb2fc, R22, R23, R24, R25, R26, R27, R28, R29)
   237  	SHA512ROUND1(57, 0x78a5636f43172f60, R29, R22, R23, R24, R25, R26, R27, R28)
   238  	SHA512ROUND1(58, 0x84c87814a1f0ab72, R28, R29, R22, R23, R24, R25, R26, R27)
   239  	SHA512ROUND1(59, 0x8cc702081a6439ec, R27, R28, R29, R22, R23, R24, R25, R26)
   240  	SHA512ROUND1(60, 0x90befffa23631e28, R26, R27, R28, R29, R22, R23, R24, R25)
   241  	SHA512ROUND1(61, 0xa4506cebde82bde9, R25, R26, R27, R28, R29, R22, R23, R24)
   242  	SHA512ROUND1(62, 0xbef9a3f7b2c67915, R24, R25, R26, R27, R28, R29, R22, R23)
   243  	SHA512ROUND1(63, 0xc67178f2e372532b, R23, R24, R25, R26, R27, R28, R29, R22)
   244  	SHA512ROUND1(64, 0xca273eceea26619c, R22, R23, R24, R25, R26, R27, R28, R29)
   245  	SHA512ROUND1(65, 0xd186b8c721c0c207, R29, R22, R23, R24, R25, R26, R27, R28)
   246  	SHA512ROUND1(66, 0xeada7dd6cde0eb1e, R28, R29, R22, R23, R24, R25, R26, R27)
   247  	SHA512ROUND1(67, 0xf57d4f7fee6ed178, R27, R28, R29, R22, R23, R24, R25, R26)
   248  	SHA512ROUND1(68, 0x06f067aa72176fba, R26, R27, R28, R29, R22, R23, R24, R25)
   249  	SHA512ROUND1(69, 0x0a637dc5a2c898a6, R25, R26, R27, R28, R29, R22, R23, R24)
   250  	SHA512ROUND1(70, 0x113f9804bef90dae, R24, R25, R26, R27, R28, R29, R22, R23)
   251  	SHA512ROUND1(71, 0x1b710b35131c471b, R23, R24, R25, R26, R27, R28, R29, R22)
   252  	SHA512ROUND1(72, 0x28db77f523047d84, R22, R23, R24, R25, R26, R27, R28, R29)
   253  	SHA512ROUND1(73, 0x32caab7b40c72493, R29, R22, R23, R24, R25, R26, R27, R28)
   254  	SHA512ROUND1(74, 0x3c9ebe0a15c9bebc, R28, R29, R22, R23, R24, R25, R26, R27)
   255  	SHA512ROUND1(75, 0x431d67c49c100d4c, R27, R28, R29, R22, R23, R24, R25, R26)
   256  	SHA512ROUND1(76, 0x4cc5d4becb3e42b6, R26, R27, R28, R29, R22, R23, R24, R25)
   257  	SHA512ROUND1(77, 0x597f299cfc657e2a, R25, R26, R27, R28, R29, R22, R23, R24)
   258  	SHA512ROUND1(78, 0x5fcb6fab3ad6faec, R24, R25, R26, R27, R28, R29, R22, R23)
   259  	SHA512ROUND1(79, 0x6c44198c4a475817, R23, R24, R25, R26, R27, R28, R29, R22)
   260  
   261  	MOVD	dig+0(FP), R9
   262  	MOVD	(0*8)(R9), R21
   263  	ADD	R21, R22	// H0 = a + H0
   264  	MOVD	R22, (0*8)(R9)
   265  	MOVD	(1*8)(R9), R21
   266  	ADD	R21, R23	// H1 = b + H1
   267  	MOVD	R23, (1*8)(R9)
   268  	MOVD	(2*8)(R9), R21
   269  	ADD	R21, R24	// H2 = c + H2
   270  	MOVD	R24, (2*8)(R9)
   271  	MOVD	(3*8)(R9), R21
   272  	ADD	R21, R25	// H3 = d + H3
   273  	MOVD	R25, (3*8)(R9)
   274  	MOVD	(4*8)(R9), R21
   275  	ADD	R21, R26	// H4 = e + H4
   276  	MOVD	R26, (4*8)(R9)
   277  	MOVD	(5*8)(R9), R21
   278  	ADD	R21, R27	// H5 = f + H5
   279  	MOVD	R27, (5*8)(R9)
   280  	MOVD	(6*8)(R9), R21
   281  	ADD	R21, R28	// H6 = g + H6
   282  	MOVD	R28, (6*8)(R9)
   283  	MOVD	(7*8)(R9), R21
   284  	ADD	R21, R29	// H7 = h + H7
   285  	MOVD	R29, (7*8)(R9)
   286  
   287  	ADD	$128, R6
   288  	MOVD	640(R1), R21
   289  	CMPU	R6, R21
   290  	BLT	loop
   291  
   292  end:
   293  	RET