github.com/mit-dci/lit@v0.0.0-20221102210550-8c3d3b49f2ce/crypto/fastsha256/sha256block_amd64.s (about)

     1  // Copyright 2013 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !appengine
     6  
     7  //#include "../../../cmd/ld/textflag.h"
     8  // just use the #define for now since this isn't in the main repo yet.
     9  #define NOSPLIT 4
    10  
    11  // SHA256 block routine. See sha256block.go for Go equivalent.
    12  //
    13  // The algorithm is detailed in FIPS 180-4:
    14  //
    15  //  http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    16  //
    17  // Wt = Mt; for 0 <= t <= 15
    18  // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    19  //
    20  // a = H0
    21  // b = H1
    22  // c = H2
    23  // d = H3
    24  // e = H4
    25  // f = H5
    26  // g = H6
    27  // h = H7
    28  //
    29  // for t = 0 to 63 {
    30  //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    31  //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    32  //    h = g
    33  //    g = f
    34  //    f = e
    35  //    e = d + T1
    36  //    d = c
    37  //    c = b
    38  //    b = a
    39  //    a = T1 + T2
    40  // }
    41  //
    42  // H0 = a + H0
    43  // H1 = b + H1
    44  // H2 = c + H2
    45  // H3 = d + H3
    46  // H4 = e + H4
    47  // H5 = f + H5
    48  // H6 = g + H6
    49  // H7 = h + H7
    50  
    51  // Wt = Mt; for 0 <= t <= 15
    52  #define MSGSCHEDULE0(index) \
    53  	MOVL	(index*4)(SI), AX; \
    54  	BSWAPL	AX; \
    55  	MOVL	AX, (index*4)(BP)
    56  
    57  // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    58  //   SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
    59  //   SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
    60  #define MSGSCHEDULE1(index) \
    61  	MOVL	((index-2)*4)(BP), AX; \
    62  	MOVL	AX, CX; \
    63  	RORL	$17, AX; \
    64  	MOVL	CX, DX; \
    65  	RORL	$19, CX; \
    66  	SHRL	$10, DX; \
    67  	MOVL	((index-15)*4)(BP), BX; \
    68  	XORL	CX, AX; \
    69  	MOVL	BX, CX; \
    70  	XORL	DX, AX; \
    71  	RORL	$7, BX; \
    72  	MOVL	CX, DX; \
    73  	SHRL	$3, DX; \
    74  	RORL	$18, CX; \
    75  	ADDL	((index-7)*4)(BP), AX; \
    76  	XORL	CX, BX; \
    77  	XORL	DX, BX; \
    78  	ADDL	((index-16)*4)(BP), BX; \
    79  	ADDL	BX, AX; \
    80  	MOVL	AX, ((index)*4)(BP)
    81  
    82  // Calculate T1 in AX - uses AX, CX and DX registers.
    83  // h is also used as an accumulator. Wt is passed in AX.
    84  //   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
    85  //     BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
    86  //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    87  #define SHA256T1(const, e, f, g, h) \
    88  	ADDL	AX, h; \
    89  	MOVL	e, AX; \
    90  	ADDL	$const, h; \
    91  	MOVL	e, CX; \
    92  	RORL	$6, AX; \
    93  	MOVL	e, DX; \
    94  	RORL	$11, CX; \
    95  	XORL	CX, AX; \
    96  	MOVL	e, CX; \
    97  	RORL	$25, DX; \
    98  	ANDL	f, CX; \
    99  	XORL	AX, DX; \
   100  	MOVL	e, AX; \
   101  	NOTL	AX; \
   102  	ADDL	DX, h; \
   103  	ANDL	g, AX; \
   104  	XORL	CX, AX; \
   105  	ADDL	h, AX
   106  
   107  // Calculate T2 in BX - uses BX, CX, DX and DI registers.
   108  //   T2 = BIGSIGMA0(a) + Maj(a, b, c)
   109  //     BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
   110  //     Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
   111  #define SHA256T2(a, b, c) \
   112  	MOVL	a, DI; \
   113  	MOVL	c, BX; \
   114  	RORL	$2, DI; \
   115  	MOVL	a, DX; \
   116  	ANDL	b, BX; \
   117  	RORL	$13, DX; \
   118  	MOVL	a, CX; \
   119  	ANDL	c, CX; \
   120  	XORL	DX, DI; \
   121  	XORL	CX, BX; \
   122  	MOVL	a, DX; \
   123  	MOVL	b, CX; \
   124  	RORL	$22, DX; \
   125  	ANDL	a, CX; \
   126  	XORL	CX, BX; \
   127  	XORL	DX, DI; \
   128  	ADDL	DI, BX
   129  
   130  // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
   131  // The values for e and a are stored in d and h, ready for rotation.
   132  #define SHA256ROUND(index, const, a, b, c, d, e, f, g, h) \
   133  	SHA256T1(const, e, f, g, h); \
   134  	SHA256T2(a, b, c); \
   135  	MOVL	BX, h; \
   136  	ADDL	AX, d; \
   137  	ADDL	AX, h
   138  
   139  #define SHA256ROUND0(index, const, a, b, c, d, e, f, g, h) \
   140  	MSGSCHEDULE0(index); \
   141  	SHA256ROUND(index, const, a, b, c, d, e, f, g, h)
   142  
   143  #define SHA256ROUND1(index, const, a, b, c, d, e, f, g, h) \
   144  	MSGSCHEDULE1(index); \
   145  	SHA256ROUND(index, const, a, b, c, d, e, f, g, h)
   146  
   147  TEXT ·block(SB),0,$264-32
   148  	MOVQ	p_base+8(FP), SI
   149  	MOVQ	p_len+16(FP), DX
   150  	SHRQ	$6, DX
   151  	SHLQ	$6, DX
   152  
   153  	LEAQ	(SI)(DX*1), DI
   154  	MOVQ	DI, 256(SP)
   155  	CMPQ	SI, DI
   156  	JEQ	end
   157  
   158  	MOVQ	dig+0(FP), BP
   159  	MOVL	(0*4)(BP), R8		// a = H0
   160  	MOVL	(1*4)(BP), R9		// b = H1
   161  	MOVL	(2*4)(BP), R10		// c = H2
   162  	MOVL	(3*4)(BP), R11		// d = H3
   163  	MOVL	(4*4)(BP), R12		// e = H4
   164  	MOVL	(5*4)(BP), R13		// f = H5
   165  	MOVL	(6*4)(BP), R14		// g = H6
   166  	MOVL	(7*4)(BP), R15		// h = H7
   167  
   168  loop:
   169  	MOVQ	SP, BP			// message schedule
   170  
   171  	SHA256ROUND0(0, 0x428a2f98, R8, R9, R10, R11, R12, R13, R14, R15)
   172  	SHA256ROUND0(1, 0x71374491, R15, R8, R9, R10, R11, R12, R13, R14)
   173  	SHA256ROUND0(2, 0xb5c0fbcf, R14, R15, R8, R9, R10, R11, R12, R13)
   174  	SHA256ROUND0(3, 0xe9b5dba5, R13, R14, R15, R8, R9, R10, R11, R12)
   175  	SHA256ROUND0(4, 0x3956c25b, R12, R13, R14, R15, R8, R9, R10, R11)
   176  	SHA256ROUND0(5, 0x59f111f1, R11, R12, R13, R14, R15, R8, R9, R10)
   177  	SHA256ROUND0(6, 0x923f82a4, R10, R11, R12, R13, R14, R15, R8, R9)
   178  	SHA256ROUND0(7, 0xab1c5ed5, R9, R10, R11, R12, R13, R14, R15, R8)
   179  	SHA256ROUND0(8, 0xd807aa98, R8, R9, R10, R11, R12, R13, R14, R15)
   180  	SHA256ROUND0(9, 0x12835b01, R15, R8, R9, R10, R11, R12, R13, R14)
   181  	SHA256ROUND0(10, 0x243185be, R14, R15, R8, R9, R10, R11, R12, R13)
   182  	SHA256ROUND0(11, 0x550c7dc3, R13, R14, R15, R8, R9, R10, R11, R12)
   183  	SHA256ROUND0(12, 0x72be5d74, R12, R13, R14, R15, R8, R9, R10, R11)
   184  	SHA256ROUND0(13, 0x80deb1fe, R11, R12, R13, R14, R15, R8, R9, R10)
   185  	SHA256ROUND0(14, 0x9bdc06a7, R10, R11, R12, R13, R14, R15, R8, R9)
   186  	SHA256ROUND0(15, 0xc19bf174, R9, R10, R11, R12, R13, R14, R15, R8)
   187  
   188  	SHA256ROUND1(16, 0xe49b69c1, R8, R9, R10, R11, R12, R13, R14, R15)
   189  	SHA256ROUND1(17, 0xefbe4786, R15, R8, R9, R10, R11, R12, R13, R14)
   190  	SHA256ROUND1(18, 0x0fc19dc6, R14, R15, R8, R9, R10, R11, R12, R13)
   191  	SHA256ROUND1(19, 0x240ca1cc, R13, R14, R15, R8, R9, R10, R11, R12)
   192  	SHA256ROUND1(20, 0x2de92c6f, R12, R13, R14, R15, R8, R9, R10, R11)
   193  	SHA256ROUND1(21, 0x4a7484aa, R11, R12, R13, R14, R15, R8, R9, R10)
   194  	SHA256ROUND1(22, 0x5cb0a9dc, R10, R11, R12, R13, R14, R15, R8, R9)
   195  	SHA256ROUND1(23, 0x76f988da, R9, R10, R11, R12, R13, R14, R15, R8)
   196  	SHA256ROUND1(24, 0x983e5152, R8, R9, R10, R11, R12, R13, R14, R15)
   197  	SHA256ROUND1(25, 0xa831c66d, R15, R8, R9, R10, R11, R12, R13, R14)
   198  	SHA256ROUND1(26, 0xb00327c8, R14, R15, R8, R9, R10, R11, R12, R13)
   199  	SHA256ROUND1(27, 0xbf597fc7, R13, R14, R15, R8, R9, R10, R11, R12)
   200  	SHA256ROUND1(28, 0xc6e00bf3, R12, R13, R14, R15, R8, R9, R10, R11)
   201  	SHA256ROUND1(29, 0xd5a79147, R11, R12, R13, R14, R15, R8, R9, R10)
   202  	SHA256ROUND1(30, 0x06ca6351, R10, R11, R12, R13, R14, R15, R8, R9)
   203  	SHA256ROUND1(31, 0x14292967, R9, R10, R11, R12, R13, R14, R15, R8)
   204  	SHA256ROUND1(32, 0x27b70a85, R8, R9, R10, R11, R12, R13, R14, R15)
   205  	SHA256ROUND1(33, 0x2e1b2138, R15, R8, R9, R10, R11, R12, R13, R14)
   206  	SHA256ROUND1(34, 0x4d2c6dfc, R14, R15, R8, R9, R10, R11, R12, R13)
   207  	SHA256ROUND1(35, 0x53380d13, R13, R14, R15, R8, R9, R10, R11, R12)
   208  	SHA256ROUND1(36, 0x650a7354, R12, R13, R14, R15, R8, R9, R10, R11)
   209  	SHA256ROUND1(37, 0x766a0abb, R11, R12, R13, R14, R15, R8, R9, R10)
   210  	SHA256ROUND1(38, 0x81c2c92e, R10, R11, R12, R13, R14, R15, R8, R9)
   211  	SHA256ROUND1(39, 0x92722c85, R9, R10, R11, R12, R13, R14, R15, R8)
   212  	SHA256ROUND1(40, 0xa2bfe8a1, R8, R9, R10, R11, R12, R13, R14, R15)
   213  	SHA256ROUND1(41, 0xa81a664b, R15, R8, R9, R10, R11, R12, R13, R14)
   214  	SHA256ROUND1(42, 0xc24b8b70, R14, R15, R8, R9, R10, R11, R12, R13)
   215  	SHA256ROUND1(43, 0xc76c51a3, R13, R14, R15, R8, R9, R10, R11, R12)
   216  	SHA256ROUND1(44, 0xd192e819, R12, R13, R14, R15, R8, R9, R10, R11)
   217  	SHA256ROUND1(45, 0xd6990624, R11, R12, R13, R14, R15, R8, R9, R10)
   218  	SHA256ROUND1(46, 0xf40e3585, R10, R11, R12, R13, R14, R15, R8, R9)
   219  	SHA256ROUND1(47, 0x106aa070, R9, R10, R11, R12, R13, R14, R15, R8)
   220  	SHA256ROUND1(48, 0x19a4c116, R8, R9, R10, R11, R12, R13, R14, R15)
   221  	SHA256ROUND1(49, 0x1e376c08, R15, R8, R9, R10, R11, R12, R13, R14)
   222  	SHA256ROUND1(50, 0x2748774c, R14, R15, R8, R9, R10, R11, R12, R13)
   223  	SHA256ROUND1(51, 0x34b0bcb5, R13, R14, R15, R8, R9, R10, R11, R12)
   224  	SHA256ROUND1(52, 0x391c0cb3, R12, R13, R14, R15, R8, R9, R10, R11)
   225  	SHA256ROUND1(53, 0x4ed8aa4a, R11, R12, R13, R14, R15, R8, R9, R10)
   226  	SHA256ROUND1(54, 0x5b9cca4f, R10, R11, R12, R13, R14, R15, R8, R9)
   227  	SHA256ROUND1(55, 0x682e6ff3, R9, R10, R11, R12, R13, R14, R15, R8)
   228  	SHA256ROUND1(56, 0x748f82ee, R8, R9, R10, R11, R12, R13, R14, R15)
   229  	SHA256ROUND1(57, 0x78a5636f, R15, R8, R9, R10, R11, R12, R13, R14)
   230  	SHA256ROUND1(58, 0x84c87814, R14, R15, R8, R9, R10, R11, R12, R13)
   231  	SHA256ROUND1(59, 0x8cc70208, R13, R14, R15, R8, R9, R10, R11, R12)
   232  	SHA256ROUND1(60, 0x90befffa, R12, R13, R14, R15, R8, R9, R10, R11)
   233  	SHA256ROUND1(61, 0xa4506ceb, R11, R12, R13, R14, R15, R8, R9, R10)
   234  	SHA256ROUND1(62, 0xbef9a3f7, R10, R11, R12, R13, R14, R15, R8, R9)
   235  	SHA256ROUND1(63, 0xc67178f2, R9, R10, R11, R12, R13, R14, R15, R8)
   236  
   237  	MOVQ	dig+0(FP), BP
   238  	ADDL	(0*4)(BP), R8	// H0 = a + H0
   239  	MOVL	R8, (0*4)(BP)
   240  	ADDL	(1*4)(BP), R9	// H1 = b + H1
   241  	MOVL	R9, (1*4)(BP)
   242  	ADDL	(2*4)(BP), R10	// H2 = c + H2
   243  	MOVL	R10, (2*4)(BP)
   244  	ADDL	(3*4)(BP), R11	// H3 = d + H3
   245  	MOVL	R11, (3*4)(BP)
   246  	ADDL	(4*4)(BP), R12	// H4 = e + H4
   247  	MOVL	R12, (4*4)(BP)
   248  	ADDL	(5*4)(BP), R13	// H5 = f + H5
   249  	MOVL	R13, (5*4)(BP)
   250  	ADDL	(6*4)(BP), R14	// H6 = g + H6
   251  	MOVL	R14, (6*4)(BP)
   252  	ADDL	(7*4)(BP), R15	// H7 = h + H7
   253  	MOVL	R15, (7*4)(BP)
   254  
   255  	ADDQ	$64, SI
   256  	CMPQ	SI, 256(SP)
   257  	JB	loop
   258  
   259  end:
   260  	RET