github.com/riscv/riscv-go@v0.0.0-20200123204226-124ebd6fcc8e/src/crypto/sha512/sha512block_amd64.s (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // SHA512 block routine. See sha512block.go for Go equivalent.
     8  //
     9  // The algorithm is detailed in FIPS 180-4:
    10  //
    11  //  http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    12  //
    13  // Wt = Mt; for 0 <= t <= 15
    14  // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
    15  //
    16  // a = H0
    17  // b = H1
    18  // c = H2
    19  // d = H3
    20  // e = H4
    21  // f = H5
    22  // g = H6
    23  // h = H7
    24  //
    25  // for t = 0 to 79 {
    26  //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    27  //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    28  //    h = g
    29  //    g = f
    30  //    f = e
    31  //    e = d + T1
    32  //    d = c
    33  //    c = b
    34  //    b = a
    35  //    a = T1 + T2
    36  // }
    37  //
    38  // H0 = a + H0
    39  // H1 = b + H1
    40  // H2 = c + H2
    41  // H3 = d + H3
    42  // H4 = e + H4
    43  // H5 = f + H5
    44  // H6 = g + H6
    45  // H7 = h + H7
    46  
    47  // Wt = Mt; for 0 <= t <= 15
    48  #define MSGSCHEDULE0(index) \
    49  	MOVQ	(index*8)(SI), AX; \
    50  	BSWAPQ	AX; \
    51  	MOVQ	AX, (index*8)(BP)
    52  
    53  // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
    54  //   SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
    55  //   SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
    56  #define MSGSCHEDULE1(index) \
    57  	MOVQ	((index-2)*8)(BP), AX; \
    58  	MOVQ	AX, CX; \
    59  	RORQ	$19, AX; \
    60  	MOVQ	CX, DX; \
    61  	RORQ	$61, CX; \
    62  	SHRQ	$6, DX; \
    63  	MOVQ	((index-15)*8)(BP), BX; \
    64  	XORQ	CX, AX; \
    65  	MOVQ	BX, CX; \
    66  	XORQ	DX, AX; \
    67  	RORQ	$1, BX; \
    68  	MOVQ	CX, DX; \
    69  	SHRQ	$7, DX; \
    70  	RORQ	$8, CX; \
    71  	ADDQ	((index-7)*8)(BP), AX; \
    72  	XORQ	CX, BX; \
    73  	XORQ	DX, BX; \
    74  	ADDQ	((index-16)*8)(BP), BX; \
    75  	ADDQ	BX, AX; \
    76  	MOVQ	AX, ((index)*8)(BP)
    77  
    78  // Calculate T1 in AX - uses AX, CX and DX registers.
    79  // h is also used as an accumulator. Wt is passed in AX.
    80  //   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
    81  //     BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
    82  //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    83  #define SHA512T1(const, e, f, g, h) \
    84  	MOVQ	$const, DX; \
    85  	ADDQ	AX, h; \
    86  	MOVQ	e, AX; \
    87  	ADDQ	DX, h; \
    88  	MOVQ	e, CX; \
    89  	RORQ	$14, AX; \
    90  	MOVQ	e, DX; \
    91  	RORQ	$18, CX; \
    92  	XORQ	CX, AX; \
    93  	MOVQ	e, CX; \
    94  	RORQ	$41, DX; \
    95  	ANDQ	f, CX; \
    96  	XORQ	AX, DX; \
    97  	MOVQ	e, AX; \
    98  	NOTQ	AX; \
    99  	ADDQ	DX, h; \
   100  	ANDQ	g, AX; \
   101  	XORQ	CX, AX; \
   102  	ADDQ	h, AX
   103  
   104  // Calculate T2 in BX - uses BX, CX, DX and DI registers.
   105  //   T2 = BIGSIGMA0(a) + Maj(a, b, c)
   106  //     BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
   107  //     Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
   108  #define SHA512T2(a, b, c) \
   109  	MOVQ	a, DI; \
   110  	MOVQ	c, BX; \
   111  	RORQ	$28, DI; \
   112  	MOVQ	a, DX; \
   113  	ANDQ	b, BX; \
   114  	RORQ	$34, DX; \
   115  	MOVQ	a, CX; \
   116  	ANDQ	c, CX; \
   117  	XORQ	DX, DI; \
   118  	XORQ	CX, BX; \
   119  	MOVQ	a, DX; \
   120  	MOVQ	b, CX; \
   121  	RORQ	$39, DX; \
   122  	ANDQ	a, CX; \
   123  	XORQ	CX, BX; \
   124  	XORQ	DX, DI; \
   125  	ADDQ	DI, BX
   126  
   127  // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
   128  // The values for e and a are stored in d and h, ready for rotation.
   129  #define SHA512ROUND(index, const, a, b, c, d, e, f, g, h) \
   130  	SHA512T1(const, e, f, g, h); \
   131  	SHA512T2(a, b, c); \
   132  	MOVQ	BX, h; \
   133  	ADDQ	AX, d; \
   134  	ADDQ	AX, h
   135  
   136  #define SHA512ROUND0(index, const, a, b, c, d, e, f, g, h) \
   137  	MSGSCHEDULE0(index); \
   138  	SHA512ROUND(index, const, a, b, c, d, e, f, g, h)
   139  
   140  #define SHA512ROUND1(index, const, a, b, c, d, e, f, g, h) \
   141  	MSGSCHEDULE1(index); \
   142  	SHA512ROUND(index, const, a, b, c, d, e, f, g, h)
   143  
   144  TEXT ·block(SB),0,$648-32
   145  	MOVQ	p_base+8(FP), SI
   146  	MOVQ	p_len+16(FP), DX
   147  	SHRQ	$7, DX
   148  	SHLQ	$7, DX
   149  
   150  	LEAQ	(SI)(DX*1), DI
   151  	MOVQ	DI, 640(SP)
   152  	CMPQ	SI, DI
   153  	JEQ	end
   154  
   155  	MOVQ	dig+0(FP), BP
   156  	MOVQ	(0*8)(BP), R8		// a = H0
   157  	MOVQ	(1*8)(BP), R9		// b = H1
   158  	MOVQ	(2*8)(BP), R10		// c = H2
   159  	MOVQ	(3*8)(BP), R11		// d = H3
   160  	MOVQ	(4*8)(BP), R12		// e = H4
   161  	MOVQ	(5*8)(BP), R13		// f = H5
   162  	MOVQ	(6*8)(BP), R14		// g = H6
   163  	MOVQ	(7*8)(BP), R15		// h = H7
   164  
   165  loop:
   166  	MOVQ	SP, BP			// message schedule
   167  
   168  	SHA512ROUND0(0, 0x428a2f98d728ae22, R8, R9, R10, R11, R12, R13, R14, R15)
   169  	SHA512ROUND0(1, 0x7137449123ef65cd, R15, R8, R9, R10, R11, R12, R13, R14)
   170  	SHA512ROUND0(2, 0xb5c0fbcfec4d3b2f, R14, R15, R8, R9, R10, R11, R12, R13)
   171  	SHA512ROUND0(3, 0xe9b5dba58189dbbc, R13, R14, R15, R8, R9, R10, R11, R12)
   172  	SHA512ROUND0(4, 0x3956c25bf348b538, R12, R13, R14, R15, R8, R9, R10, R11)
   173  	SHA512ROUND0(5, 0x59f111f1b605d019, R11, R12, R13, R14, R15, R8, R9, R10)
   174  	SHA512ROUND0(6, 0x923f82a4af194f9b, R10, R11, R12, R13, R14, R15, R8, R9)
   175  	SHA512ROUND0(7, 0xab1c5ed5da6d8118, R9, R10, R11, R12, R13, R14, R15, R8)
   176  	SHA512ROUND0(8, 0xd807aa98a3030242, R8, R9, R10, R11, R12, R13, R14, R15)
   177  	SHA512ROUND0(9, 0x12835b0145706fbe, R15, R8, R9, R10, R11, R12, R13, R14)
   178  	SHA512ROUND0(10, 0x243185be4ee4b28c, R14, R15, R8, R9, R10, R11, R12, R13)
   179  	SHA512ROUND0(11, 0x550c7dc3d5ffb4e2, R13, R14, R15, R8, R9, R10, R11, R12)
   180  	SHA512ROUND0(12, 0x72be5d74f27b896f, R12, R13, R14, R15, R8, R9, R10, R11)
   181  	SHA512ROUND0(13, 0x80deb1fe3b1696b1, R11, R12, R13, R14, R15, R8, R9, R10)
   182  	SHA512ROUND0(14, 0x9bdc06a725c71235, R10, R11, R12, R13, R14, R15, R8, R9)
   183  	SHA512ROUND0(15, 0xc19bf174cf692694, R9, R10, R11, R12, R13, R14, R15, R8)
   184  
   185  	SHA512ROUND1(16, 0xe49b69c19ef14ad2, R8, R9, R10, R11, R12, R13, R14, R15)
   186  	SHA512ROUND1(17, 0xefbe4786384f25e3, R15, R8, R9, R10, R11, R12, R13, R14)
   187  	SHA512ROUND1(18, 0x0fc19dc68b8cd5b5, R14, R15, R8, R9, R10, R11, R12, R13)
   188  	SHA512ROUND1(19, 0x240ca1cc77ac9c65, R13, R14, R15, R8, R9, R10, R11, R12)
   189  	SHA512ROUND1(20, 0x2de92c6f592b0275, R12, R13, R14, R15, R8, R9, R10, R11)
   190  	SHA512ROUND1(21, 0x4a7484aa6ea6e483, R11, R12, R13, R14, R15, R8, R9, R10)
   191  	SHA512ROUND1(22, 0x5cb0a9dcbd41fbd4, R10, R11, R12, R13, R14, R15, R8, R9)
   192  	SHA512ROUND1(23, 0x76f988da831153b5, R9, R10, R11, R12, R13, R14, R15, R8)
   193  	SHA512ROUND1(24, 0x983e5152ee66dfab, R8, R9, R10, R11, R12, R13, R14, R15)
   194  	SHA512ROUND1(25, 0xa831c66d2db43210, R15, R8, R9, R10, R11, R12, R13, R14)
   195  	SHA512ROUND1(26, 0xb00327c898fb213f, R14, R15, R8, R9, R10, R11, R12, R13)
   196  	SHA512ROUND1(27, 0xbf597fc7beef0ee4, R13, R14, R15, R8, R9, R10, R11, R12)
   197  	SHA512ROUND1(28, 0xc6e00bf33da88fc2, R12, R13, R14, R15, R8, R9, R10, R11)
   198  	SHA512ROUND1(29, 0xd5a79147930aa725, R11, R12, R13, R14, R15, R8, R9, R10)
   199  	SHA512ROUND1(30, 0x06ca6351e003826f, R10, R11, R12, R13, R14, R15, R8, R9)
   200  	SHA512ROUND1(31, 0x142929670a0e6e70, R9, R10, R11, R12, R13, R14, R15, R8)
   201  	SHA512ROUND1(32, 0x27b70a8546d22ffc, R8, R9, R10, R11, R12, R13, R14, R15)
   202  	SHA512ROUND1(33, 0x2e1b21385c26c926, R15, R8, R9, R10, R11, R12, R13, R14)
   203  	SHA512ROUND1(34, 0x4d2c6dfc5ac42aed, R14, R15, R8, R9, R10, R11, R12, R13)
   204  	SHA512ROUND1(35, 0x53380d139d95b3df, R13, R14, R15, R8, R9, R10, R11, R12)
   205  	SHA512ROUND1(36, 0x650a73548baf63de, R12, R13, R14, R15, R8, R9, R10, R11)
   206  	SHA512ROUND1(37, 0x766a0abb3c77b2a8, R11, R12, R13, R14, R15, R8, R9, R10)
   207  	SHA512ROUND1(38, 0x81c2c92e47edaee6, R10, R11, R12, R13, R14, R15, R8, R9)
   208  	SHA512ROUND1(39, 0x92722c851482353b, R9, R10, R11, R12, R13, R14, R15, R8)
   209  	SHA512ROUND1(40, 0xa2bfe8a14cf10364, R8, R9, R10, R11, R12, R13, R14, R15)
   210  	SHA512ROUND1(41, 0xa81a664bbc423001, R15, R8, R9, R10, R11, R12, R13, R14)
   211  	SHA512ROUND1(42, 0xc24b8b70d0f89791, R14, R15, R8, R9, R10, R11, R12, R13)
   212  	SHA512ROUND1(43, 0xc76c51a30654be30, R13, R14, R15, R8, R9, R10, R11, R12)
   213  	SHA512ROUND1(44, 0xd192e819d6ef5218, R12, R13, R14, R15, R8, R9, R10, R11)
   214  	SHA512ROUND1(45, 0xd69906245565a910, R11, R12, R13, R14, R15, R8, R9, R10)
   215  	SHA512ROUND1(46, 0xf40e35855771202a, R10, R11, R12, R13, R14, R15, R8, R9)
   216  	SHA512ROUND1(47, 0x106aa07032bbd1b8, R9, R10, R11, R12, R13, R14, R15, R8)
   217  	SHA512ROUND1(48, 0x19a4c116b8d2d0c8, R8, R9, R10, R11, R12, R13, R14, R15)
   218  	SHA512ROUND1(49, 0x1e376c085141ab53, R15, R8, R9, R10, R11, R12, R13, R14)
   219  	SHA512ROUND1(50, 0x2748774cdf8eeb99, R14, R15, R8, R9, R10, R11, R12, R13)
   220  	SHA512ROUND1(51, 0x34b0bcb5e19b48a8, R13, R14, R15, R8, R9, R10, R11, R12)
   221  	SHA512ROUND1(52, 0x391c0cb3c5c95a63, R12, R13, R14, R15, R8, R9, R10, R11)
   222  	SHA512ROUND1(53, 0x4ed8aa4ae3418acb, R11, R12, R13, R14, R15, R8, R9, R10)
   223  	SHA512ROUND1(54, 0x5b9cca4f7763e373, R10, R11, R12, R13, R14, R15, R8, R9)
   224  	SHA512ROUND1(55, 0x682e6ff3d6b2b8a3, R9, R10, R11, R12, R13, R14, R15, R8)
   225  	SHA512ROUND1(56, 0x748f82ee5defb2fc, R8, R9, R10, R11, R12, R13, R14, R15)
   226  	SHA512ROUND1(57, 0x78a5636f43172f60, R15, R8, R9, R10, R11, R12, R13, R14)
   227  	SHA512ROUND1(58, 0x84c87814a1f0ab72, R14, R15, R8, R9, R10, R11, R12, R13)
   228  	SHA512ROUND1(59, 0x8cc702081a6439ec, R13, R14, R15, R8, R9, R10, R11, R12)
   229  	SHA512ROUND1(60, 0x90befffa23631e28, R12, R13, R14, R15, R8, R9, R10, R11)
   230  	SHA512ROUND1(61, 0xa4506cebde82bde9, R11, R12, R13, R14, R15, R8, R9, R10)
   231  	SHA512ROUND1(62, 0xbef9a3f7b2c67915, R10, R11, R12, R13, R14, R15, R8, R9)
   232  	SHA512ROUND1(63, 0xc67178f2e372532b, R9, R10, R11, R12, R13, R14, R15, R8)
   233  	SHA512ROUND1(64, 0xca273eceea26619c, R8, R9, R10, R11, R12, R13, R14, R15)
   234  	SHA512ROUND1(65, 0xd186b8c721c0c207, R15, R8, R9, R10, R11, R12, R13, R14)
   235  	SHA512ROUND1(66, 0xeada7dd6cde0eb1e, R14, R15, R8, R9, R10, R11, R12, R13)
   236  	SHA512ROUND1(67, 0xf57d4f7fee6ed178, R13, R14, R15, R8, R9, R10, R11, R12)
   237  	SHA512ROUND1(68, 0x06f067aa72176fba, R12, R13, R14, R15, R8, R9, R10, R11)
   238  	SHA512ROUND1(69, 0x0a637dc5a2c898a6, R11, R12, R13, R14, R15, R8, R9, R10)
   239  	SHA512ROUND1(70, 0x113f9804bef90dae, R10, R11, R12, R13, R14, R15, R8, R9)
   240  	SHA512ROUND1(71, 0x1b710b35131c471b, R9, R10, R11, R12, R13, R14, R15, R8)
   241  	SHA512ROUND1(72, 0x28db77f523047d84, R8, R9, R10, R11, R12, R13, R14, R15)
   242  	SHA512ROUND1(73, 0x32caab7b40c72493, R15, R8, R9, R10, R11, R12, R13, R14)
   243  	SHA512ROUND1(74, 0x3c9ebe0a15c9bebc, R14, R15, R8, R9, R10, R11, R12, R13)
   244  	SHA512ROUND1(75, 0x431d67c49c100d4c, R13, R14, R15, R8, R9, R10, R11, R12)
   245  	SHA512ROUND1(76, 0x4cc5d4becb3e42b6, R12, R13, R14, R15, R8, R9, R10, R11)
   246  	SHA512ROUND1(77, 0x597f299cfc657e2a, R11, R12, R13, R14, R15, R8, R9, R10)
   247  	SHA512ROUND1(78, 0x5fcb6fab3ad6faec, R10, R11, R12, R13, R14, R15, R8, R9)
   248  	SHA512ROUND1(79, 0x6c44198c4a475817, R9, R10, R11, R12, R13, R14, R15, R8)
   249  
   250  	MOVQ	dig+0(FP), BP
   251  	ADDQ	(0*8)(BP), R8	// H0 = a + H0
   252  	MOVQ	R8, (0*8)(BP)
   253  	ADDQ	(1*8)(BP), R9	// H1 = b + H1
   254  	MOVQ	R9, (1*8)(BP)
   255  	ADDQ	(2*8)(BP), R10	// H2 = c + H2
   256  	MOVQ	R10, (2*8)(BP)
   257  	ADDQ	(3*8)(BP), R11	// H3 = d + H3
   258  	MOVQ	R11, (3*8)(BP)
   259  	ADDQ	(4*8)(BP), R12	// H4 = e + H4
   260  	MOVQ	R12, (4*8)(BP)
   261  	ADDQ	(5*8)(BP), R13	// H5 = f + H5
   262  	MOVQ	R13, (5*8)(BP)
   263  	ADDQ	(6*8)(BP), R14	// H6 = g + H6
   264  	MOVQ	R14, (6*8)(BP)
   265  	ADDQ	(7*8)(BP), R15	// H7 = h + H7
   266  	MOVQ	R15, (7*8)(BP)
   267  
   268  	ADDQ	$128, SI
   269  	CMPQ	SI, 640(SP)
   270  	JB	loop
   271  
   272  end:
   273  	RET