github.com/ccccaoqing/test@v0.0.0-20220510085219-3985d23445c0/src/crypto/sha1/sha1block_arm.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 // ARM version of md5block.go 6 7 #include "textflag.h" 8 9 // SHA1 block routine. See sha1block.go for Go equivalent. 10 // 11 // There are 80 rounds of 4 types: 12 // - rounds 0-15 are type 1 and load data (ROUND1 macro). 13 // - rounds 16-19 are type 1 and do not load data (ROUND1x macro). 14 // - rounds 20-39 are type 2 and do not load data (ROUND2 macro). 15 // - rounds 40-59 are type 3 and do not load data (ROUND3 macro). 16 // - rounds 60-79 are type 4 and do not load data (ROUND4 macro). 17 // 18 // Each round loads or shuffles the data, then computes a per-round 19 // function of b, c, d, and then mixes the result into and rotates the 20 // five registers a, b, c, d, e holding the intermediate results. 21 // 22 // The register rotation is implemented by rotating the arguments to 23 // the round macros instead of by explicit move instructions. 24 25 // Register definitions 26 data = 0 // Pointer to incoming data 27 const = 1 // Current constant for SHA round 28 a = 2 // SHA1 accumulator 29 b = 3 // SHA1 accumulator 30 c = 4 // SHA1 accumulator 31 d = 5 // SHA1 accumulator 32 e = 6 // SHA1 accumulator 33 t0 = 7 // Temporary 34 t1 = 8 // Temporary 35 // r9, r10 are forbidden 36 // r11 is OK provided you check the assembler that no synthetic instructions use it 37 t2 = 11 // Temporary 38 ctr = 12 // loop counter 39 w = 14 // point to w buffer 40 41 // func block(dig *digest, p []byte) 42 // 0(FP) is *digest 43 // 4(FP) is p.array (struct Slice) 44 // 8(FP) is p.len 45 //12(FP) is p.cap 46 // 47 // Stack frame 48 p_end = -4 // -4(SP) pointer to the end of data 49 p_data = p_end - 4 // -8(SP) current data pointer 50 w_buf = p_data - 4*80 // -328(SP) 80 words temporary buffer w uint32[80] 51 saved = w_buf - 4*5 // -348(SP) saved sha1 registers a,b,c,d,e - these must be last 52 // Total size +4 for saved LR is 352 53 54 // w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3] 55 // e += w[i] 56 #define LOAD(e) \ 57 MOVBU 2(R(data)), R(t0) ; \ 58 MOVBU 3(R(data)), R(t1) ; \ 59 MOVBU 1(R(data)), R(t2) ; \ 60 ORR R(t0)<<8, R(t1), R(t0) ; \ 61 MOVBU.P 4(R(data)), R(t1) ; \ 62 ORR R(t2)<<16, R(t0), R(t0) ; \ 63 ORR R(t1)<<24, R(t0), R(t0) ; \ 64 MOVW.P R(t0), 4(R(w)) ; \ 65 ADD R(t0), R(e), R(e) 66 67 // tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] 68 // w[i&0xf] = tmp<<1 | tmp>>(32-1) 69 // e += w[i&0xf] 70 #define SHUFFLE(e) \ 71 MOVW (-16*4)(R(w)), R(t0) ; \ 72 MOVW (-14*4)(R(w)), R(t1) ; \ 73 MOVW (-8*4)(R(w)), R(t2) ; \ 74 EOR R(t0), R(t1), R(t0) ; \ 75 MOVW (-3*4)(R(w)), R(t1) ; \ 76 EOR R(t2), R(t0), R(t0) ; \ 77 EOR R(t0), R(t1), R(t0) ; \ 78 MOVW R(t0)@>(32-1), R(t0) ; \ 79 MOVW.P R(t0), 4(R(w)) ; \ 80 ADD R(t0), R(e), R(e) 81 82 // t1 = (b & c) | ((~b) & d) 83 #define FUNC1(a, b, c, d, e) \ 84 MVN R(b), R(t1) ; \ 85 AND R(b), R(c), R(t0) ; \ 86 AND R(d), R(t1), R(t1) ; \ 87 ORR R(t0), R(t1), R(t1) 88 89 // t1 = b ^ c ^ d 90 #define FUNC2(a, b, c, d, e) \ 91 EOR R(b), R(c), R(t1) ; \ 92 EOR R(d), R(t1), R(t1) 93 94 // t1 = (b & c) | (b & d) | (c & d) = 95 // t1 = (b & c) | ((b | c) & d) 96 #define FUNC3(a, b, c, d, e) \ 97 ORR R(b), R(c), R(t0) ; \ 98 AND R(b), R(c), R(t1) ; \ 99 AND R(d), R(t0), R(t0) ; \ 100 ORR R(t0), R(t1), R(t1) 101 102 #define FUNC4 FUNC2 103 104 // a5 := a<<5 | a>>(32-5) 105 // b = b<<30 | b>>(32-30) 106 // e = a5 + t1 + e + const 107 #define MIX(a, b, c, d, e) \ 108 ADD R(t1), R(e), R(e) ; \ 109 MOVW R(b)@>(32-30), R(b) ; \ 110 ADD R(a)@>(32-5), R(e), R(e) ; \ 111 ADD R(const), R(e), R(e) 112 113 #define ROUND1(a, b, c, d, e) \ 114 LOAD(e) ; \ 115 FUNC1(a, b, c, d, e) ; \ 116 MIX(a, b, c, d, e) 117 118 #define ROUND1x(a, b, c, d, e) \ 119 SHUFFLE(e) ; \ 120 FUNC1(a, b, c, d, e) ; \ 121 MIX(a, b, c, d, e) 122 123 #define ROUND2(a, b, c, d, e) \ 124 SHUFFLE(e) ; \ 125 FUNC2(a, b, c, d, e) ; \ 126 MIX(a, b, c, d, e) 127 128 #define ROUND3(a, b, c, d, e) \ 129 SHUFFLE(e) ; \ 130 FUNC3(a, b, c, d, e) ; \ 131 MIX(a, b, c, d, e) 132 133 #define ROUND4(a, b, c, d, e) \ 134 SHUFFLE(e) ; \ 135 FUNC4(a, b, c, d, e) ; \ 136 MIX(a, b, c, d, e) 137 138 139 // func block(dig *digest, p []byte) 140 TEXT ·block(SB), 0, $352-16 141 MOVW p+4(FP), R(data) // pointer to the data 142 MOVW p_len+8(FP), R(t0) // number of bytes 143 ADD R(data), R(t0) 144 MOVW R(t0), p_end(SP) // pointer to end of data 145 146 // Load up initial SHA1 accumulator 147 MOVW dig+0(FP), R(t0) 148 MOVM.IA (R(t0)), [R(a),R(b),R(c),R(d),R(e)] 149 150 loop: 151 // Save registers at SP+4 onwards 152 MOVM.IB [R(a),R(b),R(c),R(d),R(e)], (R13) 153 154 MOVW $w_buf(SP), R(w) 155 MOVW $0x5A827999, R(const) 156 MOVW $3, R(ctr) 157 loop1: ROUND1(a, b, c, d, e) 158 ROUND1(e, a, b, c, d) 159 ROUND1(d, e, a, b, c) 160 ROUND1(c, d, e, a, b) 161 ROUND1(b, c, d, e, a) 162 SUB.S $1, R(ctr) 163 BNE loop1 164 165 ROUND1(a, b, c, d, e) 166 ROUND1x(e, a, b, c, d) 167 ROUND1x(d, e, a, b, c) 168 ROUND1x(c, d, e, a, b) 169 ROUND1x(b, c, d, e, a) 170 171 MOVW $0x6ED9EBA1, R(const) 172 MOVW $4, R(ctr) 173 loop2: ROUND2(a, b, c, d, e) 174 ROUND2(e, a, b, c, d) 175 ROUND2(d, e, a, b, c) 176 ROUND2(c, d, e, a, b) 177 ROUND2(b, c, d, e, a) 178 SUB.S $1, R(ctr) 179 BNE loop2 180 181 MOVW $0x8F1BBCDC, R(const) 182 MOVW $4, R(ctr) 183 loop3: ROUND3(a, b, c, d, e) 184 ROUND3(e, a, b, c, d) 185 ROUND3(d, e, a, b, c) 186 ROUND3(c, d, e, a, b) 187 ROUND3(b, c, d, e, a) 188 SUB.S $1, R(ctr) 189 BNE loop3 190 191 MOVW $0xCA62C1D6, R(const) 192 MOVW $4, R(ctr) 193 loop4: ROUND4(a, b, c, d, e) 194 ROUND4(e, a, b, c, d) 195 ROUND4(d, e, a, b, c) 196 ROUND4(c, d, e, a, b) 197 ROUND4(b, c, d, e, a) 198 SUB.S $1, R(ctr) 199 BNE loop4 200 201 // Accumulate - restoring registers from SP+4 202 MOVM.IB (R13), [R(t0),R(t1),R(t2),R(ctr),R(w)] 203 ADD R(t0), R(a) 204 ADD R(t1), R(b) 205 ADD R(t2), R(c) 206 ADD R(ctr), R(d) 207 ADD R(w), R(e) 208 209 MOVW p_end(SP), R(t0) 210 CMP R(t0), R(data) 211 BLO loop 212 213 // Save final SHA1 accumulator 214 MOVW dig+0(FP), R(t0) 215 MOVM.IA [R(a),R(b),R(c),R(d),R(e)], (R(t0)) 216 217 RET