github.com/akaros/go-akaros@v0.0.0-20181004170632-85005d477eab/src/crypto/sha256/sha256block_amd64.s (about)

     1  // Copyright 2013 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // SHA256 block routine. See sha256block.go for Go equivalent.
     8  //
     9  // The algorithm is detailed in FIPS 180-4:
    10  //
    11  //  http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    12  //
    13  // Wt = Mt; for 0 <= t <= 15
    14  // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    15  //
    16  // a = H0
    17  // b = H1
    18  // c = H2
    19  // d = H3
    20  // e = H4
    21  // f = H5
    22  // g = H6
    23  // h = H7
    24  //
    25  // for t = 0 to 63 {
    26  //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    27  //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    28  //    h = g
    29  //    g = f
    30  //    f = e
    31  //    e = d + T1
    32  //    d = c
    33  //    c = b
    34  //    b = a
    35  //    a = T1 + T2
    36  // }
    37  //
    38  // H0 = a + H0
    39  // H1 = b + H1
    40  // H2 = c + H2
    41  // H3 = d + H3
    42  // H4 = e + H4
    43  // H5 = f + H5
    44  // H6 = g + H6
    45  // H7 = h + H7
    46  
    47  // Wt = Mt; for 0 <= t <= 15
    48  #define MSGSCHEDULE0(index) \
    49  	MOVL	(index*4)(SI), AX; \
    50  	BSWAPL	AX; \
    51  	MOVL	AX, (index*4)(BP)
    52  
    53  // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    54  //   SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
    55  //   SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
    56  #define MSGSCHEDULE1(index) \
    57  	MOVL	((index-2)*4)(BP), AX; \
    58  	MOVL	AX, CX; \
    59  	RORL	$17, AX; \
    60  	MOVL	CX, DX; \
    61  	RORL	$19, CX; \
    62  	SHRL	$10, DX; \
    63  	MOVL	((index-15)*4)(BP), BX; \
    64  	XORL	CX, AX; \
    65  	MOVL	BX, CX; \
    66  	XORL	DX, AX; \
    67  	RORL	$7, BX; \
    68  	MOVL	CX, DX; \
    69  	SHRL	$3, DX; \
    70  	RORL	$18, CX; \
    71  	ADDL	((index-7)*4)(BP), AX; \
    72  	XORL	CX, BX; \
    73  	XORL	DX, BX; \
    74  	ADDL	((index-16)*4)(BP), BX; \
    75  	ADDL	BX, AX; \
    76  	MOVL	AX, ((index)*4)(BP)
    77  
    78  // Calculate T1 in AX - uses AX, CX and DX registers.
    79  // h is also used as an accumulator. Wt is passed in AX.
    80  //   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
    81  //     BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
    82  //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    83  #define SHA256T1(const, e, f, g, h) \
    84  	ADDL	AX, h; \
    85  	MOVL	e, AX; \
    86  	ADDL	$const, h; \
    87  	MOVL	e, CX; \
    88  	RORL	$6, AX; \
    89  	MOVL	e, DX; \
    90  	RORL	$11, CX; \
    91  	XORL	CX, AX; \
    92  	MOVL	e, CX; \
    93  	RORL	$25, DX; \
    94  	ANDL	f, CX; \
    95  	XORL	AX, DX; \
    96  	MOVL	e, AX; \
    97  	NOTL	AX; \
    98  	ADDL	DX, h; \
    99  	ANDL	g, AX; \
   100  	XORL	CX, AX; \
   101  	ADDL	h, AX
   102  
   103  // Calculate T2 in BX - uses BX, CX, DX and DI registers.
   104  //   T2 = BIGSIGMA0(a) + Maj(a, b, c)
   105  //     BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
   106  //     Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
   107  #define SHA256T2(a, b, c) \
   108  	MOVL	a, DI; \
   109  	MOVL	c, BX; \
   110  	RORL	$2, DI; \
   111  	MOVL	a, DX; \
   112  	ANDL	b, BX; \
   113  	RORL	$13, DX; \
   114  	MOVL	a, CX; \
   115  	ANDL	c, CX; \
   116  	XORL	DX, DI; \
   117  	XORL	CX, BX; \
   118  	MOVL	a, DX; \
   119  	MOVL	b, CX; \
   120  	RORL	$22, DX; \
   121  	ANDL	a, CX; \
   122  	XORL	CX, BX; \
   123  	XORL	DX, DI; \
   124  	ADDL	DI, BX
   125  
   126  // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
   127  // The values for e and a are stored in d and h, ready for rotation.
   128  #define SHA256ROUND(index, const, a, b, c, d, e, f, g, h) \
   129  	SHA256T1(const, e, f, g, h); \
   130  	SHA256T2(a, b, c); \
   131  	MOVL	BX, h; \
   132  	ADDL	AX, d; \
   133  	ADDL	AX, h
   134  
   135  #define SHA256ROUND0(index, const, a, b, c, d, e, f, g, h) \
   136  	MSGSCHEDULE0(index); \
   137  	SHA256ROUND(index, const, a, b, c, d, e, f, g, h)
   138  
   139  #define SHA256ROUND1(index, const, a, b, c, d, e, f, g, h) \
   140  	MSGSCHEDULE1(index); \
   141  	SHA256ROUND(index, const, a, b, c, d, e, f, g, h)
   142  
   143  TEXT ·block(SB),0,$264-32
   144  	MOVQ	p_base+8(FP), SI
   145  	MOVQ	p_len+16(FP), DX
   146  	SHRQ	$6, DX
   147  	SHLQ	$6, DX
   148  
   149  	LEAQ	(SI)(DX*1), DI
   150  	MOVQ	DI, 256(SP)
   151  	CMPQ	SI, DI
   152  	JEQ	end
   153  
   154  	MOVQ	dig+0(FP), BP
   155  	MOVL	(0*4)(BP), R8		// a = H0
   156  	MOVL	(1*4)(BP), R9		// b = H1
   157  	MOVL	(2*4)(BP), R10		// c = H2
   158  	MOVL	(3*4)(BP), R11		// d = H3
   159  	MOVL	(4*4)(BP), R12		// e = H4
   160  	MOVL	(5*4)(BP), R13		// f = H5
   161  	MOVL	(6*4)(BP), R14		// g = H6
   162  	MOVL	(7*4)(BP), R15		// h = H7
   163  
   164  loop:
   165  	MOVQ	SP, BP			// message schedule
   166  
   167  	SHA256ROUND0(0, 0x428a2f98, R8, R9, R10, R11, R12, R13, R14, R15)
   168  	SHA256ROUND0(1, 0x71374491, R15, R8, R9, R10, R11, R12, R13, R14)
   169  	SHA256ROUND0(2, 0xb5c0fbcf, R14, R15, R8, R9, R10, R11, R12, R13)
   170  	SHA256ROUND0(3, 0xe9b5dba5, R13, R14, R15, R8, R9, R10, R11, R12)
   171  	SHA256ROUND0(4, 0x3956c25b, R12, R13, R14, R15, R8, R9, R10, R11)
   172  	SHA256ROUND0(5, 0x59f111f1, R11, R12, R13, R14, R15, R8, R9, R10)
   173  	SHA256ROUND0(6, 0x923f82a4, R10, R11, R12, R13, R14, R15, R8, R9)
   174  	SHA256ROUND0(7, 0xab1c5ed5, R9, R10, R11, R12, R13, R14, R15, R8)
   175  	SHA256ROUND0(8, 0xd807aa98, R8, R9, R10, R11, R12, R13, R14, R15)
   176  	SHA256ROUND0(9, 0x12835b01, R15, R8, R9, R10, R11, R12, R13, R14)
   177  	SHA256ROUND0(10, 0x243185be, R14, R15, R8, R9, R10, R11, R12, R13)
   178  	SHA256ROUND0(11, 0x550c7dc3, R13, R14, R15, R8, R9, R10, R11, R12)
   179  	SHA256ROUND0(12, 0x72be5d74, R12, R13, R14, R15, R8, R9, R10, R11)
   180  	SHA256ROUND0(13, 0x80deb1fe, R11, R12, R13, R14, R15, R8, R9, R10)
   181  	SHA256ROUND0(14, 0x9bdc06a7, R10, R11, R12, R13, R14, R15, R8, R9)
   182  	SHA256ROUND0(15, 0xc19bf174, R9, R10, R11, R12, R13, R14, R15, R8)
   183  
   184  	SHA256ROUND1(16, 0xe49b69c1, R8, R9, R10, R11, R12, R13, R14, R15)
   185  	SHA256ROUND1(17, 0xefbe4786, R15, R8, R9, R10, R11, R12, R13, R14)
   186  	SHA256ROUND1(18, 0x0fc19dc6, R14, R15, R8, R9, R10, R11, R12, R13)
   187  	SHA256ROUND1(19, 0x240ca1cc, R13, R14, R15, R8, R9, R10, R11, R12)
   188  	SHA256ROUND1(20, 0x2de92c6f, R12, R13, R14, R15, R8, R9, R10, R11)
   189  	SHA256ROUND1(21, 0x4a7484aa, R11, R12, R13, R14, R15, R8, R9, R10)
   190  	SHA256ROUND1(22, 0x5cb0a9dc, R10, R11, R12, R13, R14, R15, R8, R9)
   191  	SHA256ROUND1(23, 0x76f988da, R9, R10, R11, R12, R13, R14, R15, R8)
   192  	SHA256ROUND1(24, 0x983e5152, R8, R9, R10, R11, R12, R13, R14, R15)
   193  	SHA256ROUND1(25, 0xa831c66d, R15, R8, R9, R10, R11, R12, R13, R14)
   194  	SHA256ROUND1(26, 0xb00327c8, R14, R15, R8, R9, R10, R11, R12, R13)
   195  	SHA256ROUND1(27, 0xbf597fc7, R13, R14, R15, R8, R9, R10, R11, R12)
   196  	SHA256ROUND1(28, 0xc6e00bf3, R12, R13, R14, R15, R8, R9, R10, R11)
   197  	SHA256ROUND1(29, 0xd5a79147, R11, R12, R13, R14, R15, R8, R9, R10)
   198  	SHA256ROUND1(30, 0x06ca6351, R10, R11, R12, R13, R14, R15, R8, R9)
   199  	SHA256ROUND1(31, 0x14292967, R9, R10, R11, R12, R13, R14, R15, R8)
   200  	SHA256ROUND1(32, 0x27b70a85, R8, R9, R10, R11, R12, R13, R14, R15)
   201  	SHA256ROUND1(33, 0x2e1b2138, R15, R8, R9, R10, R11, R12, R13, R14)
   202  	SHA256ROUND1(34, 0x4d2c6dfc, R14, R15, R8, R9, R10, R11, R12, R13)
   203  	SHA256ROUND1(35, 0x53380d13, R13, R14, R15, R8, R9, R10, R11, R12)
   204  	SHA256ROUND1(36, 0x650a7354, R12, R13, R14, R15, R8, R9, R10, R11)
   205  	SHA256ROUND1(37, 0x766a0abb, R11, R12, R13, R14, R15, R8, R9, R10)
   206  	SHA256ROUND1(38, 0x81c2c92e, R10, R11, R12, R13, R14, R15, R8, R9)
   207  	SHA256ROUND1(39, 0x92722c85, R9, R10, R11, R12, R13, R14, R15, R8)
   208  	SHA256ROUND1(40, 0xa2bfe8a1, R8, R9, R10, R11, R12, R13, R14, R15)
   209  	SHA256ROUND1(41, 0xa81a664b, R15, R8, R9, R10, R11, R12, R13, R14)
   210  	SHA256ROUND1(42, 0xc24b8b70, R14, R15, R8, R9, R10, R11, R12, R13)
   211  	SHA256ROUND1(43, 0xc76c51a3, R13, R14, R15, R8, R9, R10, R11, R12)
   212  	SHA256ROUND1(44, 0xd192e819, R12, R13, R14, R15, R8, R9, R10, R11)
   213  	SHA256ROUND1(45, 0xd6990624, R11, R12, R13, R14, R15, R8, R9, R10)
   214  	SHA256ROUND1(46, 0xf40e3585, R10, R11, R12, R13, R14, R15, R8, R9)
   215  	SHA256ROUND1(47, 0x106aa070, R9, R10, R11, R12, R13, R14, R15, R8)
   216  	SHA256ROUND1(48, 0x19a4c116, R8, R9, R10, R11, R12, R13, R14, R15)
   217  	SHA256ROUND1(49, 0x1e376c08, R15, R8, R9, R10, R11, R12, R13, R14)
   218  	SHA256ROUND1(50, 0x2748774c, R14, R15, R8, R9, R10, R11, R12, R13)
   219  	SHA256ROUND1(51, 0x34b0bcb5, R13, R14, R15, R8, R9, R10, R11, R12)
   220  	SHA256ROUND1(52, 0x391c0cb3, R12, R13, R14, R15, R8, R9, R10, R11)
   221  	SHA256ROUND1(53, 0x4ed8aa4a, R11, R12, R13, R14, R15, R8, R9, R10)
   222  	SHA256ROUND1(54, 0x5b9cca4f, R10, R11, R12, R13, R14, R15, R8, R9)
   223  	SHA256ROUND1(55, 0x682e6ff3, R9, R10, R11, R12, R13, R14, R15, R8)
   224  	SHA256ROUND1(56, 0x748f82ee, R8, R9, R10, R11, R12, R13, R14, R15)
   225  	SHA256ROUND1(57, 0x78a5636f, R15, R8, R9, R10, R11, R12, R13, R14)
   226  	SHA256ROUND1(58, 0x84c87814, R14, R15, R8, R9, R10, R11, R12, R13)
   227  	SHA256ROUND1(59, 0x8cc70208, R13, R14, R15, R8, R9, R10, R11, R12)
   228  	SHA256ROUND1(60, 0x90befffa, R12, R13, R14, R15, R8, R9, R10, R11)
   229  	SHA256ROUND1(61, 0xa4506ceb, R11, R12, R13, R14, R15, R8, R9, R10)
   230  	SHA256ROUND1(62, 0xbef9a3f7, R10, R11, R12, R13, R14, R15, R8, R9)
   231  	SHA256ROUND1(63, 0xc67178f2, R9, R10, R11, R12, R13, R14, R15, R8)
   232  
   233  	MOVQ	dig+0(FP), BP
   234  	ADDL	(0*4)(BP), R8	// H0 = a + H0
   235  	MOVL	R8, (0*4)(BP)
   236  	ADDL	(1*4)(BP), R9	// H1 = b + H1
   237  	MOVL	R9, (1*4)(BP)
   238  	ADDL	(2*4)(BP), R10	// H2 = c + H2
   239  	MOVL	R10, (2*4)(BP)
   240  	ADDL	(3*4)(BP), R11	// H3 = d + H3
   241  	MOVL	R11, (3*4)(BP)
   242  	ADDL	(4*4)(BP), R12	// H4 = e + H4
   243  	MOVL	R12, (4*4)(BP)
   244  	ADDL	(5*4)(BP), R13	// H5 = f + H5
   245  	MOVL	R13, (5*4)(BP)
   246  	ADDL	(6*4)(BP), R14	// H6 = g + H6
   247  	MOVL	R14, (6*4)(BP)
   248  	ADDL	(7*4)(BP), R15	// H7 = h + H7
   249  	MOVL	R15, (7*4)(BP)
   250  
   251  	ADDQ	$64, SI
   252  	CMPQ	SI, 256(SP)
   253  	JB	loop
   254  
   255  end:
   256  	RET