github.com/d4l3k/go@v0.0.0-20151015000803-65fc379daeda/src/crypto/sha1/sha1block_arm.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 // ARM version of md5block.go 6 7 #include "textflag.h" 8 9 // SHA1 block routine. See sha1block.go for Go equivalent. 10 // 11 // There are 80 rounds of 4 types: 12 // - rounds 0-15 are type 1 and load data (ROUND1 macro). 13 // - rounds 16-19 are type 1 and do not load data (ROUND1x macro). 14 // - rounds 20-39 are type 2 and do not load data (ROUND2 macro). 15 // - rounds 40-59 are type 3 and do not load data (ROUND3 macro). 16 // - rounds 60-79 are type 4 and do not load data (ROUND4 macro). 17 // 18 // Each round loads or shuffles the data, then computes a per-round 19 // function of b, c, d, and then mixes the result into and rotates the 20 // five registers a, b, c, d, e holding the intermediate results. 21 // 22 // The register rotation is implemented by rotating the arguments to 23 // the round macros instead of by explicit move instructions. 24 25 // Register definitions 26 #define Rdata R0 // Pointer to incoming data 27 #define Rconst R1 // Current constant for SHA round 28 #define Ra R2 // SHA1 accumulator 29 #define Rb R3 // SHA1 accumulator 30 #define Rc R4 // SHA1 accumulator 31 #define Rd R5 // SHA1 accumulator 32 #define Re R6 // SHA1 accumulator 33 #define Rt0 R7 // Temporary 34 #define Rt1 R8 // Temporary 35 // r9, r10 are forbidden 36 // r11 is OK provided you check the assembler that no synthetic instructions use it 37 #define Rt2 R11 // Temporary 38 #define Rctr R12 // loop counter 39 #define Rw R14 // point to w buffer 40 41 // func block(dig *digest, p []byte) 42 // 0(FP) is *digest 43 // 4(FP) is p.array (struct Slice) 44 // 8(FP) is p.len 45 //12(FP) is p.cap 46 // 47 // Stack frame 48 #define p_end end-4(SP) // pointer to the end of data 49 #define p_data data-8(SP) // current data pointer (unused?) 50 #define w_buf buf-(8+4*80)(SP) //80 words temporary buffer w uint32[80] 51 #define saved abcde-(8+4*80+4*5)(SP) // saved sha1 registers a,b,c,d,e - these must be last (unused?) 52 // Total size +4 for saved LR is 352 53 54 // w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3] 55 // e += w[i] 56 #define LOAD(Re) \ 57 MOVBU 2(Rdata), Rt0 ; \ 58 MOVBU 3(Rdata), Rt1 ; \ 59 MOVBU 1(Rdata), Rt2 ; \ 60 ORR Rt0<<8, Rt1, Rt0 ; \ 61 MOVBU.P 4(Rdata), Rt1 ; \ 62 ORR Rt2<<16, Rt0, Rt0 ; \ 63 ORR Rt1<<24, Rt0, Rt0 ; \ 64 MOVW.P Rt0, 4(Rw) ; \ 65 ADD Rt0, Re, Re 66 67 // tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] 68 // w[i&0xf] = tmp<<1 | tmp>>(32-1) 69 // e += w[i&0xf] 70 #define SHUFFLE(Re) \ 71 MOVW (-16*4)(Rw), Rt0 ; \ 72 MOVW (-14*4)(Rw), Rt1 ; \ 73 MOVW (-8*4)(Rw), Rt2 ; \ 74 EOR Rt0, Rt1, Rt0 ; \ 75 MOVW (-3*4)(Rw), Rt1 ; \ 76 EOR Rt2, Rt0, Rt0 ; \ 77 EOR Rt0, Rt1, Rt0 ; \ 78 MOVW Rt0@>(32-1), Rt0 ; \ 79 MOVW.P Rt0, 4(Rw) ; \ 80 ADD Rt0, Re, Re 81 82 // t1 = (b & c) | ((~b) & d) 83 #define FUNC1(Ra, Rb, Rc, Rd, Re) \ 84 MVN Rb, Rt1 ; \ 85 AND Rb, Rc, Rt0 ; \ 86 AND Rd, Rt1, Rt1 ; \ 87 ORR Rt0, Rt1, Rt1 88 89 // t1 = b ^ c ^ d 90 #define FUNC2(Ra, Rb, Rc, Rd, Re) \ 91 EOR Rb, Rc, Rt1 ; \ 92 EOR Rd, Rt1, Rt1 93 94 // t1 = (b & c) | (b & d) | (c & d) = 95 // t1 = (b & c) | ((b | c) & d) 96 #define FUNC3(Ra, Rb, Rc, Rd, Re) \ 97 ORR Rb, Rc, Rt0 ; \ 98 AND Rb, Rc, Rt1 ; \ 99 AND Rd, Rt0, Rt0 ; \ 100 ORR Rt0, Rt1, Rt1 101 102 #define FUNC4 FUNC2 103 104 // a5 := a<<5 | a>>(32-5) 105 // b = b<<30 | b>>(32-30) 106 // e = a5 + t1 + e + const 107 #define MIX(Ra, Rb, Rc, Rd, Re) \ 108 ADD Rt1, Re, Re ; \ 109 MOVW Rb@>(32-30), Rb ; \ 110 ADD Ra@>(32-5), Re, Re ; \ 111 ADD Rconst, Re, Re 112 113 #define ROUND1(Ra, Rb, Rc, Rd, Re) \ 114 LOAD(Re) ; \ 115 FUNC1(Ra, Rb, Rc, Rd, Re) ; \ 116 MIX(Ra, Rb, Rc, Rd, Re) 117 118 #define ROUND1x(Ra, Rb, Rc, Rd, Re) \ 119 SHUFFLE(Re) ; \ 120 FUNC1(Ra, Rb, Rc, Rd, Re) ; \ 121 MIX(Ra, Rb, Rc, Rd, Re) 122 123 #define ROUND2(Ra, Rb, Rc, Rd, Re) \ 124 SHUFFLE(Re) ; \ 125 FUNC2(Ra, Rb, Rc, Rd, Re) ; \ 126 MIX(Ra, Rb, Rc, Rd, Re) 127 128 #define ROUND3(Ra, Rb, Rc, Rd, Re) \ 129 SHUFFLE(Re) ; \ 130 FUNC3(Ra, Rb, Rc, Rd, Re) ; \ 131 MIX(Ra, Rb, Rc, Rd, Re) 132 133 #define ROUND4(Ra, Rb, Rc, Rd, Re) \ 134 SHUFFLE(Re) ; \ 135 FUNC4(Ra, Rb, Rc, Rd, Re) ; \ 136 MIX(Ra, Rb, Rc, Rd, Re) 137 138 139 // func block(dig *digest, p []byte) 140 TEXT ·block(SB), 0, $352-16 141 MOVW p+4(FP), Rdata // pointer to the data 142 MOVW p_len+8(FP), Rt0 // number of bytes 143 ADD Rdata, Rt0 144 MOVW Rt0, p_end // pointer to end of data 145 146 // Load up initial SHA1 accumulator 147 MOVW dig+0(FP), Rt0 148 MOVM.IA (Rt0), [Ra,Rb,Rc,Rd,Re] 149 150 loop: 151 // Save registers at SP+4 onwards 152 MOVM.IB [Ra,Rb,Rc,Rd,Re], (R13) 153 154 MOVW $w_buf, Rw 155 MOVW $0x5A827999, Rconst 156 MOVW $3, Rctr 157 loop1: ROUND1(Ra, Rb, Rc, Rd, Re) 158 ROUND1(Re, Ra, Rb, Rc, Rd) 159 ROUND1(Rd, Re, Ra, Rb, Rc) 160 ROUND1(Rc, Rd, Re, Ra, Rb) 161 ROUND1(Rb, Rc, Rd, Re, Ra) 162 SUB.S $1, Rctr 163 BNE loop1 164 165 ROUND1(Ra, Rb, Rc, Rd, Re) 166 ROUND1x(Re, Ra, Rb, Rc, Rd) 167 ROUND1x(Rd, Re, Ra, Rb, Rc) 168 ROUND1x(Rc, Rd, Re, Ra, Rb) 169 ROUND1x(Rb, Rc, Rd, Re, Ra) 170 171 MOVW $0x6ED9EBA1, Rconst 172 MOVW $4, Rctr 173 loop2: ROUND2(Ra, Rb, Rc, Rd, Re) 174 ROUND2(Re, Ra, Rb, Rc, Rd) 175 ROUND2(Rd, Re, Ra, Rb, Rc) 176 ROUND2(Rc, Rd, Re, Ra, Rb) 177 ROUND2(Rb, Rc, Rd, Re, Ra) 178 SUB.S $1, Rctr 179 BNE loop2 180 181 MOVW $0x8F1BBCDC, Rconst 182 MOVW $4, Rctr 183 loop3: ROUND3(Ra, Rb, Rc, Rd, Re) 184 ROUND3(Re, Ra, Rb, Rc, Rd) 185 ROUND3(Rd, Re, Ra, Rb, Rc) 186 ROUND3(Rc, Rd, Re, Ra, Rb) 187 ROUND3(Rb, Rc, Rd, Re, Ra) 188 SUB.S $1, Rctr 189 BNE loop3 190 191 MOVW $0xCA62C1D6, Rconst 192 MOVW $4, Rctr 193 loop4: ROUND4(Ra, Rb, Rc, Rd, Re) 194 ROUND4(Re, Ra, Rb, Rc, Rd) 195 ROUND4(Rd, Re, Ra, Rb, Rc) 196 ROUND4(Rc, Rd, Re, Ra, Rb) 197 ROUND4(Rb, Rc, Rd, Re, Ra) 198 SUB.S $1, Rctr 199 BNE loop4 200 201 // Accumulate - restoring registers from SP+4 202 MOVM.IB (R13), [Rt0,Rt1,Rt2,Rctr,Rw] 203 ADD Rt0, Ra 204 ADD Rt1, Rb 205 ADD Rt2, Rc 206 ADD Rctr, Rd 207 ADD Rw, Re 208 209 MOVW p_end, Rt0 210 CMP Rt0, Rdata 211 BLO loop 212 213 // Save final SHA1 accumulator 214 MOVW dig+0(FP), Rt0 215 MOVM.IA [Ra,Rb,Rc,Rd,Re], (Rt0) 216 217 RET