github.com/mit-dci/lit@v0.0.0-20221102210550-8c3d3b49f2ce/crypto/fastsha256/sha256block_amd64.s (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !appengine 6 7 //#include "../../../cmd/ld/textflag.h" 8 // just use the #define for now since this isn't in the main repo yet. 9 #define NOSPLIT 4 10 11 // SHA256 block routine. See sha256block.go for Go equivalent. 12 // 13 // The algorithm is detailed in FIPS 180-4: 14 // 15 // http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf 16 // 17 // Wt = Mt; for 0 <= t <= 15 18 // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63 19 // 20 // a = H0 21 // b = H1 22 // c = H2 23 // d = H3 24 // e = H4 25 // f = H5 26 // g = H6 27 // h = H7 28 // 29 // for t = 0 to 63 { 30 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt 31 // T2 = BIGSIGMA0(a) + Maj(a,b,c) 32 // h = g 33 // g = f 34 // f = e 35 // e = d + T1 36 // d = c 37 // c = b 38 // b = a 39 // a = T1 + T2 40 // } 41 // 42 // H0 = a + H0 43 // H1 = b + H1 44 // H2 = c + H2 45 // H3 = d + H3 46 // H4 = e + H4 47 // H5 = f + H5 48 // H6 = g + H6 49 // H7 = h + H7 50 51 // Wt = Mt; for 0 <= t <= 15 52 #define MSGSCHEDULE0(index) \ 53 MOVL (index*4)(SI), AX; \ 54 BSWAPL AX; \ 55 MOVL AX, (index*4)(BP) 56 57 // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63 58 // SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x) 59 // SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x) 60 #define MSGSCHEDULE1(index) \ 61 MOVL ((index-2)*4)(BP), AX; \ 62 MOVL AX, CX; \ 63 RORL $17, AX; \ 64 MOVL CX, DX; \ 65 RORL $19, CX; \ 66 SHRL $10, DX; \ 67 MOVL ((index-15)*4)(BP), BX; \ 68 XORL CX, AX; \ 69 MOVL BX, CX; \ 70 XORL DX, AX; \ 71 RORL $7, BX; \ 72 MOVL CX, DX; \ 73 SHRL $3, DX; \ 74 RORL $18, CX; \ 75 ADDL ((index-7)*4)(BP), AX; \ 76 XORL CX, BX; \ 77 XORL DX, BX; \ 78 ADDL ((index-16)*4)(BP), BX; \ 79 ADDL BX, AX; \ 80 MOVL AX, ((index)*4)(BP) 81 82 // Calculate T1 in AX - uses AX, CX and DX registers. 83 // h is also used as an accumulator. Wt is passed in AX. 84 // T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt 85 // BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x) 86 // Ch(x, y, z) = (x AND y) XOR (NOT x AND z) 87 #define SHA256T1(const, e, f, g, h) \ 88 ADDL AX, h; \ 89 MOVL e, AX; \ 90 ADDL $const, h; \ 91 MOVL e, CX; \ 92 RORL $6, AX; \ 93 MOVL e, DX; \ 94 RORL $11, CX; \ 95 XORL CX, AX; \ 96 MOVL e, CX; \ 97 RORL $25, DX; \ 98 ANDL f, CX; \ 99 XORL AX, DX; \ 100 MOVL e, AX; \ 101 NOTL AX; \ 102 ADDL DX, h; \ 103 ANDL g, AX; \ 104 XORL CX, AX; \ 105 ADDL h, AX 106 107 // Calculate T2 in BX - uses BX, CX, DX and DI registers. 108 // T2 = BIGSIGMA0(a) + Maj(a, b, c) 109 // BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x) 110 // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z) 111 #define SHA256T2(a, b, c) \ 112 MOVL a, DI; \ 113 MOVL c, BX; \ 114 RORL $2, DI; \ 115 MOVL a, DX; \ 116 ANDL b, BX; \ 117 RORL $13, DX; \ 118 MOVL a, CX; \ 119 ANDL c, CX; \ 120 XORL DX, DI; \ 121 XORL CX, BX; \ 122 MOVL a, DX; \ 123 MOVL b, CX; \ 124 RORL $22, DX; \ 125 ANDL a, CX; \ 126 XORL CX, BX; \ 127 XORL DX, DI; \ 128 ADDL DI, BX 129 130 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2. 131 // The values for e and a are stored in d and h, ready for rotation. 132 #define SHA256ROUND(index, const, a, b, c, d, e, f, g, h) \ 133 SHA256T1(const, e, f, g, h); \ 134 SHA256T2(a, b, c); \ 135 MOVL BX, h; \ 136 ADDL AX, d; \ 137 ADDL AX, h 138 139 #define SHA256ROUND0(index, const, a, b, c, d, e, f, g, h) \ 140 MSGSCHEDULE0(index); \ 141 SHA256ROUND(index, const, a, b, c, d, e, f, g, h) 142 143 #define SHA256ROUND1(index, const, a, b, c, d, e, f, g, h) \ 144 MSGSCHEDULE1(index); \ 145 SHA256ROUND(index, const, a, b, c, d, e, f, g, h) 146 147 TEXT ·block(SB),0,$264-32 148 MOVQ p_base+8(FP), SI 149 MOVQ p_len+16(FP), DX 150 SHRQ $6, DX 151 SHLQ $6, DX 152 153 LEAQ (SI)(DX*1), DI 154 MOVQ DI, 256(SP) 155 CMPQ SI, DI 156 JEQ end 157 158 MOVQ dig+0(FP), BP 159 MOVL (0*4)(BP), R8 // a = H0 160 MOVL (1*4)(BP), R9 // b = H1 161 MOVL (2*4)(BP), R10 // c = H2 162 MOVL (3*4)(BP), R11 // d = H3 163 MOVL (4*4)(BP), R12 // e = H4 164 MOVL (5*4)(BP), R13 // f = H5 165 MOVL (6*4)(BP), R14 // g = H6 166 MOVL (7*4)(BP), R15 // h = H7 167 168 loop: 169 MOVQ SP, BP // message schedule 170 171 SHA256ROUND0(0, 0x428a2f98, R8, R9, R10, R11, R12, R13, R14, R15) 172 SHA256ROUND0(1, 0x71374491, R15, R8, R9, R10, R11, R12, R13, R14) 173 SHA256ROUND0(2, 0xb5c0fbcf, R14, R15, R8, R9, R10, R11, R12, R13) 174 SHA256ROUND0(3, 0xe9b5dba5, R13, R14, R15, R8, R9, R10, R11, R12) 175 SHA256ROUND0(4, 0x3956c25b, R12, R13, R14, R15, R8, R9, R10, R11) 176 SHA256ROUND0(5, 0x59f111f1, R11, R12, R13, R14, R15, R8, R9, R10) 177 SHA256ROUND0(6, 0x923f82a4, R10, R11, R12, R13, R14, R15, R8, R9) 178 SHA256ROUND0(7, 0xab1c5ed5, R9, R10, R11, R12, R13, R14, R15, R8) 179 SHA256ROUND0(8, 0xd807aa98, R8, R9, R10, R11, R12, R13, R14, R15) 180 SHA256ROUND0(9, 0x12835b01, R15, R8, R9, R10, R11, R12, R13, R14) 181 SHA256ROUND0(10, 0x243185be, R14, R15, R8, R9, R10, R11, R12, R13) 182 SHA256ROUND0(11, 0x550c7dc3, R13, R14, R15, R8, R9, R10, R11, R12) 183 SHA256ROUND0(12, 0x72be5d74, R12, R13, R14, R15, R8, R9, R10, R11) 184 SHA256ROUND0(13, 0x80deb1fe, R11, R12, R13, R14, R15, R8, R9, R10) 185 SHA256ROUND0(14, 0x9bdc06a7, R10, R11, R12, R13, R14, R15, R8, R9) 186 SHA256ROUND0(15, 0xc19bf174, R9, R10, R11, R12, R13, R14, R15, R8) 187 188 SHA256ROUND1(16, 0xe49b69c1, R8, R9, R10, R11, R12, R13, R14, R15) 189 SHA256ROUND1(17, 0xefbe4786, R15, R8, R9, R10, R11, R12, R13, R14) 190 SHA256ROUND1(18, 0x0fc19dc6, R14, R15, R8, R9, R10, R11, R12, R13) 191 SHA256ROUND1(19, 0x240ca1cc, R13, R14, R15, R8, R9, R10, R11, R12) 192 SHA256ROUND1(20, 0x2de92c6f, R12, R13, R14, R15, R8, R9, R10, R11) 193 SHA256ROUND1(21, 0x4a7484aa, R11, R12, R13, R14, R15, R8, R9, R10) 194 SHA256ROUND1(22, 0x5cb0a9dc, R10, R11, R12, R13, R14, R15, R8, R9) 195 SHA256ROUND1(23, 0x76f988da, R9, R10, R11, R12, R13, R14, R15, R8) 196 SHA256ROUND1(24, 0x983e5152, R8, R9, R10, R11, R12, R13, R14, R15) 197 SHA256ROUND1(25, 0xa831c66d, R15, R8, R9, R10, R11, R12, R13, R14) 198 SHA256ROUND1(26, 0xb00327c8, R14, R15, R8, R9, R10, R11, R12, R13) 199 SHA256ROUND1(27, 0xbf597fc7, R13, R14, R15, R8, R9, R10, R11, R12) 200 SHA256ROUND1(28, 0xc6e00bf3, R12, R13, R14, R15, R8, R9, R10, R11) 201 SHA256ROUND1(29, 0xd5a79147, R11, R12, R13, R14, R15, R8, R9, R10) 202 SHA256ROUND1(30, 0x06ca6351, R10, R11, R12, R13, R14, R15, R8, R9) 203 SHA256ROUND1(31, 0x14292967, R9, R10, R11, R12, R13, R14, R15, R8) 204 SHA256ROUND1(32, 0x27b70a85, R8, R9, R10, R11, R12, R13, R14, R15) 205 SHA256ROUND1(33, 0x2e1b2138, R15, R8, R9, R10, R11, R12, R13, R14) 206 SHA256ROUND1(34, 0x4d2c6dfc, R14, R15, R8, R9, R10, R11, R12, R13) 207 SHA256ROUND1(35, 0x53380d13, R13, R14, R15, R8, R9, R10, R11, R12) 208 SHA256ROUND1(36, 0x650a7354, R12, R13, R14, R15, R8, R9, R10, R11) 209 SHA256ROUND1(37, 0x766a0abb, R11, R12, R13, R14, R15, R8, R9, R10) 210 SHA256ROUND1(38, 0x81c2c92e, R10, R11, R12, R13, R14, R15, R8, R9) 211 SHA256ROUND1(39, 0x92722c85, R9, R10, R11, R12, R13, R14, R15, R8) 212 SHA256ROUND1(40, 0xa2bfe8a1, R8, R9, R10, R11, R12, R13, R14, R15) 213 SHA256ROUND1(41, 0xa81a664b, R15, R8, R9, R10, R11, R12, R13, R14) 214 SHA256ROUND1(42, 0xc24b8b70, R14, R15, R8, R9, R10, R11, R12, R13) 215 SHA256ROUND1(43, 0xc76c51a3, R13, R14, R15, R8, R9, R10, R11, R12) 216 SHA256ROUND1(44, 0xd192e819, R12, R13, R14, R15, R8, R9, R10, R11) 217 SHA256ROUND1(45, 0xd6990624, R11, R12, R13, R14, R15, R8, R9, R10) 218 SHA256ROUND1(46, 0xf40e3585, R10, R11, R12, R13, R14, R15, R8, R9) 219 SHA256ROUND1(47, 0x106aa070, R9, R10, R11, R12, R13, R14, R15, R8) 220 SHA256ROUND1(48, 0x19a4c116, R8, R9, R10, R11, R12, R13, R14, R15) 221 SHA256ROUND1(49, 0x1e376c08, R15, R8, R9, R10, R11, R12, R13, R14) 222 SHA256ROUND1(50, 0x2748774c, R14, R15, R8, R9, R10, R11, R12, R13) 223 SHA256ROUND1(51, 0x34b0bcb5, R13, R14, R15, R8, R9, R10, R11, R12) 224 SHA256ROUND1(52, 0x391c0cb3, R12, R13, R14, R15, R8, R9, R10, R11) 225 SHA256ROUND1(53, 0x4ed8aa4a, R11, R12, R13, R14, R15, R8, R9, R10) 226 SHA256ROUND1(54, 0x5b9cca4f, R10, R11, R12, R13, R14, R15, R8, R9) 227 SHA256ROUND1(55, 0x682e6ff3, R9, R10, R11, R12, R13, R14, R15, R8) 228 SHA256ROUND1(56, 0x748f82ee, R8, R9, R10, R11, R12, R13, R14, R15) 229 SHA256ROUND1(57, 0x78a5636f, R15, R8, R9, R10, R11, R12, R13, R14) 230 SHA256ROUND1(58, 0x84c87814, R14, R15, R8, R9, R10, R11, R12, R13) 231 SHA256ROUND1(59, 0x8cc70208, R13, R14, R15, R8, R9, R10, R11, R12) 232 SHA256ROUND1(60, 0x90befffa, R12, R13, R14, R15, R8, R9, R10, R11) 233 SHA256ROUND1(61, 0xa4506ceb, R11, R12, R13, R14, R15, R8, R9, R10) 234 SHA256ROUND1(62, 0xbef9a3f7, R10, R11, R12, R13, R14, R15, R8, R9) 235 SHA256ROUND1(63, 0xc67178f2, R9, R10, R11, R12, R13, R14, R15, R8) 236 237 MOVQ dig+0(FP), BP 238 ADDL (0*4)(BP), R8 // H0 = a + H0 239 MOVL R8, (0*4)(BP) 240 ADDL (1*4)(BP), R9 // H1 = b + H1 241 MOVL R9, (1*4)(BP) 242 ADDL (2*4)(BP), R10 // H2 = c + H2 243 MOVL R10, (2*4)(BP) 244 ADDL (3*4)(BP), R11 // H3 = d + H3 245 MOVL R11, (3*4)(BP) 246 ADDL (4*4)(BP), R12 // H4 = e + H4 247 MOVL R12, (4*4)(BP) 248 ADDL (5*4)(BP), R13 // H5 = f + H5 249 MOVL R13, (5*4)(BP) 250 ADDL (6*4)(BP), R14 // H6 = g + H6 251 MOVL R14, (6*4)(BP) 252 ADDL (7*4)(BP), R15 // H7 = h + H7 253 MOVL R15, (7*4)(BP) 254 255 ADDQ $64, SI 256 CMPQ SI, 256(SP) 257 JB loop 258 259 end: 260 RET