github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/crypto/sha1/sha1block_arm.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 // ARM version of md5block.go 6 7 //go:build !purego 8 9 #include "textflag.h" 10 11 // SHA-1 block routine. See sha1block.go for Go equivalent. 12 // 13 // There are 80 rounds of 4 types: 14 // - rounds 0-15 are type 1 and load data (ROUND1 macro). 15 // - rounds 16-19 are type 1 and do not load data (ROUND1x macro). 16 // - rounds 20-39 are type 2 and do not load data (ROUND2 macro). 17 // - rounds 40-59 are type 3 and do not load data (ROUND3 macro). 18 // - rounds 60-79 are type 4 and do not load data (ROUND4 macro). 19 // 20 // Each round loads or shuffles the data, then computes a per-round 21 // function of b, c, d, and then mixes the result into and rotates the 22 // five registers a, b, c, d, e holding the intermediate results. 23 // 24 // The register rotation is implemented by rotating the arguments to 25 // the round macros instead of by explicit move instructions. 26 27 // Register definitions 28 #define Rdata R0 // Pointer to incoming data 29 #define Rconst R1 // Current constant for SHA round 30 #define Ra R2 // SHA-1 accumulator 31 #define Rb R3 // SHA-1 accumulator 32 #define Rc R4 // SHA-1 accumulator 33 #define Rd R5 // SHA-1 accumulator 34 #define Re R6 // SHA-1 accumulator 35 #define Rt0 R7 // Temporary 36 #define Rt1 R8 // Temporary 37 // r9, r10 are forbidden 38 // r11 is OK provided you check the assembler that no synthetic instructions use it 39 #define Rt2 R11 // Temporary 40 #define Rctr R12 // loop counter 41 #define Rw R14 // point to w buffer 42 43 // func block(dig *digest, p []byte) 44 // 0(FP) is *digest 45 // 4(FP) is p.array (struct Slice) 46 // 8(FP) is p.len 47 //12(FP) is p.cap 48 // 49 // Stack frame 50 #define p_end end-4(SP) // pointer to the end of data 51 #define p_data data-8(SP) // current data pointer (unused?) 52 #define w_buf buf-(8+4*80)(SP) //80 words temporary buffer w uint32[80] 53 #define saved abcde-(8+4*80+4*5)(SP) // saved sha1 registers a,b,c,d,e - these must be last (unused?) 54 // Total size +4 for saved LR is 352 55 56 // w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3] 57 // e += w[i] 58 #define LOAD(Re) \ 59 MOVBU 2(Rdata), Rt0 ; \ 60 MOVBU 3(Rdata), Rt1 ; \ 61 MOVBU 1(Rdata), Rt2 ; \ 62 ORR Rt0<<8, Rt1, Rt0 ; \ 63 MOVBU.P 4(Rdata), Rt1 ; \ 64 ORR Rt2<<16, Rt0, Rt0 ; \ 65 ORR Rt1<<24, Rt0, Rt0 ; \ 66 MOVW.P Rt0, 4(Rw) ; \ 67 ADD Rt0, Re, Re 68 69 // tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] 70 // w[i&0xf] = tmp<<1 | tmp>>(32-1) 71 // e += w[i&0xf] 72 #define SHUFFLE(Re) \ 73 MOVW (-16*4)(Rw), Rt0 ; \ 74 MOVW (-14*4)(Rw), Rt1 ; \ 75 MOVW (-8*4)(Rw), Rt2 ; \ 76 EOR Rt0, Rt1, Rt0 ; \ 77 MOVW (-3*4)(Rw), Rt1 ; \ 78 EOR Rt2, Rt0, Rt0 ; \ 79 EOR Rt0, Rt1, Rt0 ; \ 80 MOVW Rt0@>(32-1), Rt0 ; \ 81 MOVW.P Rt0, 4(Rw) ; \ 82 ADD Rt0, Re, Re 83 84 // t1 = (b & c) | ((~b) & d) 85 #define FUNC1(Ra, Rb, Rc, Rd, Re) \ 86 MVN Rb, Rt1 ; \ 87 AND Rb, Rc, Rt0 ; \ 88 AND Rd, Rt1, Rt1 ; \ 89 ORR Rt0, Rt1, Rt1 90 91 // t1 = b ^ c ^ d 92 #define FUNC2(Ra, Rb, Rc, Rd, Re) \ 93 EOR Rb, Rc, Rt1 ; \ 94 EOR Rd, Rt1, Rt1 95 96 // t1 = (b & c) | (b & d) | (c & d) = 97 // t1 = (b & c) | ((b | c) & d) 98 #define FUNC3(Ra, Rb, Rc, Rd, Re) \ 99 ORR Rb, Rc, Rt0 ; \ 100 AND Rb, Rc, Rt1 ; \ 101 AND Rd, Rt0, Rt0 ; \ 102 ORR Rt0, Rt1, Rt1 103 104 #define FUNC4 FUNC2 105 106 // a5 := a<<5 | a>>(32-5) 107 // b = b<<30 | b>>(32-30) 108 // e = a5 + t1 + e + const 109 #define MIX(Ra, Rb, Rc, Rd, Re) \ 110 ADD Rt1, Re, Re ; \ 111 MOVW Rb@>(32-30), Rb ; \ 112 ADD Ra@>(32-5), Re, Re ; \ 113 ADD Rconst, Re, Re 114 115 #define ROUND1(Ra, Rb, Rc, Rd, Re) \ 116 LOAD(Re) ; \ 117 FUNC1(Ra, Rb, Rc, Rd, Re) ; \ 118 MIX(Ra, Rb, Rc, Rd, Re) 119 120 #define ROUND1x(Ra, Rb, Rc, Rd, Re) \ 121 SHUFFLE(Re) ; \ 122 FUNC1(Ra, Rb, Rc, Rd, Re) ; \ 123 MIX(Ra, Rb, Rc, Rd, Re) 124 125 #define ROUND2(Ra, Rb, Rc, Rd, Re) \ 126 SHUFFLE(Re) ; \ 127 FUNC2(Ra, Rb, Rc, Rd, Re) ; \ 128 MIX(Ra, Rb, Rc, Rd, Re) 129 130 #define ROUND3(Ra, Rb, Rc, Rd, Re) \ 131 SHUFFLE(Re) ; \ 132 FUNC3(Ra, Rb, Rc, Rd, Re) ; \ 133 MIX(Ra, Rb, Rc, Rd, Re) 134 135 #define ROUND4(Ra, Rb, Rc, Rd, Re) \ 136 SHUFFLE(Re) ; \ 137 FUNC4(Ra, Rb, Rc, Rd, Re) ; \ 138 MIX(Ra, Rb, Rc, Rd, Re) 139 140 141 // func block(dig *digest, p []byte) 142 TEXT ·block(SB), 0, $352-16 143 MOVW p+4(FP), Rdata // pointer to the data 144 MOVW p_len+8(FP), Rt0 // number of bytes 145 ADD Rdata, Rt0 146 MOVW Rt0, p_end // pointer to end of data 147 148 // Load up initial SHA-1 accumulator 149 MOVW dig+0(FP), Rt0 150 MOVM.IA (Rt0), [Ra,Rb,Rc,Rd,Re] 151 152 loop: 153 // Save registers at SP+4 onwards 154 MOVM.IB [Ra,Rb,Rc,Rd,Re], (R13) 155 156 MOVW $w_buf, Rw 157 MOVW $0x5A827999, Rconst 158 MOVW $3, Rctr 159 loop1: ROUND1(Ra, Rb, Rc, Rd, Re) 160 ROUND1(Re, Ra, Rb, Rc, Rd) 161 ROUND1(Rd, Re, Ra, Rb, Rc) 162 ROUND1(Rc, Rd, Re, Ra, Rb) 163 ROUND1(Rb, Rc, Rd, Re, Ra) 164 SUB.S $1, Rctr 165 BNE loop1 166 167 ROUND1(Ra, Rb, Rc, Rd, Re) 168 ROUND1x(Re, Ra, Rb, Rc, Rd) 169 ROUND1x(Rd, Re, Ra, Rb, Rc) 170 ROUND1x(Rc, Rd, Re, Ra, Rb) 171 ROUND1x(Rb, Rc, Rd, Re, Ra) 172 173 MOVW $0x6ED9EBA1, Rconst 174 MOVW $4, Rctr 175 loop2: ROUND2(Ra, Rb, Rc, Rd, Re) 176 ROUND2(Re, Ra, Rb, Rc, Rd) 177 ROUND2(Rd, Re, Ra, Rb, Rc) 178 ROUND2(Rc, Rd, Re, Ra, Rb) 179 ROUND2(Rb, Rc, Rd, Re, Ra) 180 SUB.S $1, Rctr 181 BNE loop2 182 183 MOVW $0x8F1BBCDC, Rconst 184 MOVW $4, Rctr 185 loop3: ROUND3(Ra, Rb, Rc, Rd, Re) 186 ROUND3(Re, Ra, Rb, Rc, Rd) 187 ROUND3(Rd, Re, Ra, Rb, Rc) 188 ROUND3(Rc, Rd, Re, Ra, Rb) 189 ROUND3(Rb, Rc, Rd, Re, Ra) 190 SUB.S $1, Rctr 191 BNE loop3 192 193 MOVW $0xCA62C1D6, Rconst 194 MOVW $4, Rctr 195 loop4: ROUND4(Ra, Rb, Rc, Rd, Re) 196 ROUND4(Re, Ra, Rb, Rc, Rd) 197 ROUND4(Rd, Re, Ra, Rb, Rc) 198 ROUND4(Rc, Rd, Re, Ra, Rb) 199 ROUND4(Rb, Rc, Rd, Re, Ra) 200 SUB.S $1, Rctr 201 BNE loop4 202 203 // Accumulate - restoring registers from SP+4 204 MOVM.IB (R13), [Rt0,Rt1,Rt2,Rctr,Rw] 205 ADD Rt0, Ra 206 ADD Rt1, Rb 207 ADD Rt2, Rc 208 ADD Rctr, Rd 209 ADD Rw, Re 210 211 MOVW p_end, Rt0 212 CMP Rt0, Rdata 213 BLO loop 214 215 // Save final SHA-1 accumulator 216 MOVW dig+0(FP), Rt0 217 MOVM.IA [Ra,Rb,Rc,Rd,Re], (Rt0) 218 219 RET