github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/crypto/md5/md5block_arm.s (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 // ARM version of md5block.go 6 7 //go:build !purego 8 9 #include "textflag.h" 10 11 // Register definitions 12 #define Rtable R0 // Pointer to MD5 constants table 13 #define Rdata R1 // Pointer to data to hash 14 #define Ra R2 // MD5 accumulator 15 #define Rb R3 // MD5 accumulator 16 #define Rc R4 // MD5 accumulator 17 #define Rd R5 // MD5 accumulator 18 #define Rc0 R6 // MD5 constant 19 #define Rc1 R7 // MD5 constant 20 #define Rc2 R8 // MD5 constant 21 // r9, r10 are forbidden 22 // r11 is OK provided you check the assembler that no synthetic instructions use it 23 #define Rc3 R11 // MD5 constant 24 #define Rt0 R12 // temporary 25 #define Rt1 R14 // temporary 26 27 // func block(dig *digest, p []byte) 28 // 0(FP) is *digest 29 // 4(FP) is p.array (struct Slice) 30 // 8(FP) is p.len 31 //12(FP) is p.cap 32 // 33 // Stack frame 34 #define p_end end-4(SP) // pointer to the end of data 35 #define p_data data-8(SP) // current data pointer 36 #define buf buffer-(8+4*16)(SP) //16 words temporary buffer 37 // 3 words at 4..12(R13) for called routine parameters 38 39 TEXT ·block(SB), NOSPLIT, $84-16 40 MOVW p+4(FP), Rdata // pointer to the data 41 MOVW p_len+8(FP), Rt0 // number of bytes 42 ADD Rdata, Rt0 43 MOVW Rt0, p_end // pointer to end of data 44 45 loop: 46 MOVW Rdata, p_data // Save Rdata 47 AND.S $3, Rdata, Rt0 // TST $3, Rdata not working see issue 5921 48 BEQ aligned // aligned detected - skip copy 49 50 // Copy the unaligned source data into the aligned temporary buffer 51 // memmove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers 52 MOVW $buf, Rtable // to 53 MOVW $64, Rc0 // n 54 MOVM.IB [Rtable,Rdata,Rc0], (R13) 55 BL runtime·memmove(SB) 56 57 // Point to the local aligned copy of the data 58 MOVW $buf, Rdata 59 60 aligned: 61 // Point to the table of constants 62 // A PC relative add would be cheaper than this 63 MOVW $·table(SB), Rtable 64 65 // Load up initial MD5 accumulator 66 MOVW dig+0(FP), Rc0 67 MOVM.IA (Rc0), [Ra,Rb,Rc,Rd] 68 69 // a += (((c^d)&b)^d) + X[index] + const 70 // a = a<<shift | a>>(32-shift) + b 71 #define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \ 72 EOR Rc, Rd, Rt0 ; \ 73 AND Rb, Rt0 ; \ 74 EOR Rd, Rt0 ; \ 75 MOVW (index<<2)(Rdata), Rt1 ; \ 76 ADD Rt1, Rt0 ; \ 77 ADD Rconst, Rt0 ; \ 78 ADD Rt0, Ra ; \ 79 ADD Ra@>(32-shift), Rb, Ra ; 80 81 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 82 ROUND1(Ra, Rb, Rc, Rd, 0, 7, Rc0) 83 ROUND1(Rd, Ra, Rb, Rc, 1, 12, Rc1) 84 ROUND1(Rc, Rd, Ra, Rb, 2, 17, Rc2) 85 ROUND1(Rb, Rc, Rd, Ra, 3, 22, Rc3) 86 87 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 88 ROUND1(Ra, Rb, Rc, Rd, 4, 7, Rc0) 89 ROUND1(Rd, Ra, Rb, Rc, 5, 12, Rc1) 90 ROUND1(Rc, Rd, Ra, Rb, 6, 17, Rc2) 91 ROUND1(Rb, Rc, Rd, Ra, 7, 22, Rc3) 92 93 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 94 ROUND1(Ra, Rb, Rc, Rd, 8, 7, Rc0) 95 ROUND1(Rd, Ra, Rb, Rc, 9, 12, Rc1) 96 ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2) 97 ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3) 98 99 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 100 ROUND1(Ra, Rb, Rc, Rd, 12, 7, Rc0) 101 ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1) 102 ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2) 103 ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3) 104 105 // a += (((b^c)&d)^c) + X[index] + const 106 // a = a<<shift | a>>(32-shift) + b 107 #define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \ 108 EOR Rb, Rc, Rt0 ; \ 109 AND Rd, Rt0 ; \ 110 EOR Rc, Rt0 ; \ 111 MOVW (index<<2)(Rdata), Rt1 ; \ 112 ADD Rt1, Rt0 ; \ 113 ADD Rconst, Rt0 ; \ 114 ADD Rt0, Ra ; \ 115 ADD Ra@>(32-shift), Rb, Ra ; 116 117 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 118 ROUND2(Ra, Rb, Rc, Rd, 1, 5, Rc0) 119 ROUND2(Rd, Ra, Rb, Rc, 6, 9, Rc1) 120 ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2) 121 ROUND2(Rb, Rc, Rd, Ra, 0, 20, Rc3) 122 123 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 124 ROUND2(Ra, Rb, Rc, Rd, 5, 5, Rc0) 125 ROUND2(Rd, Ra, Rb, Rc, 10, 9, Rc1) 126 ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2) 127 ROUND2(Rb, Rc, Rd, Ra, 4, 20, Rc3) 128 129 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 130 ROUND2(Ra, Rb, Rc, Rd, 9, 5, Rc0) 131 ROUND2(Rd, Ra, Rb, Rc, 14, 9, Rc1) 132 ROUND2(Rc, Rd, Ra, Rb, 3, 14, Rc2) 133 ROUND2(Rb, Rc, Rd, Ra, 8, 20, Rc3) 134 135 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 136 ROUND2(Ra, Rb, Rc, Rd, 13, 5, Rc0) 137 ROUND2(Rd, Ra, Rb, Rc, 2, 9, Rc1) 138 ROUND2(Rc, Rd, Ra, Rb, 7, 14, Rc2) 139 ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3) 140 141 // a += (b^c^d) + X[index] + const 142 // a = a<<shift | a>>(32-shift) + b 143 #define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \ 144 EOR Rb, Rc, Rt0 ; \ 145 EOR Rd, Rt0 ; \ 146 MOVW (index<<2)(Rdata), Rt1 ; \ 147 ADD Rt1, Rt0 ; \ 148 ADD Rconst, Rt0 ; \ 149 ADD Rt0, Ra ; \ 150 ADD Ra@>(32-shift), Rb, Ra ; 151 152 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 153 ROUND3(Ra, Rb, Rc, Rd, 5, 4, Rc0) 154 ROUND3(Rd, Ra, Rb, Rc, 8, 11, Rc1) 155 ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2) 156 ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3) 157 158 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 159 ROUND3(Ra, Rb, Rc, Rd, 1, 4, Rc0) 160 ROUND3(Rd, Ra, Rb, Rc, 4, 11, Rc1) 161 ROUND3(Rc, Rd, Ra, Rb, 7, 16, Rc2) 162 ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3) 163 164 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 165 ROUND3(Ra, Rb, Rc, Rd, 13, 4, Rc0) 166 ROUND3(Rd, Ra, Rb, Rc, 0, 11, Rc1) 167 ROUND3(Rc, Rd, Ra, Rb, 3, 16, Rc2) 168 ROUND3(Rb, Rc, Rd, Ra, 6, 23, Rc3) 169 170 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 171 ROUND3(Ra, Rb, Rc, Rd, 9, 4, Rc0) 172 ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1) 173 ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2) 174 ROUND3(Rb, Rc, Rd, Ra, 2, 23, Rc3) 175 176 // a += (c^(b|^d)) + X[index] + const 177 // a = a<<shift | a>>(32-shift) + b 178 #define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \ 179 MVN Rd, Rt0 ; \ 180 ORR Rb, Rt0 ; \ 181 EOR Rc, Rt0 ; \ 182 MOVW (index<<2)(Rdata), Rt1 ; \ 183 ADD Rt1, Rt0 ; \ 184 ADD Rconst, Rt0 ; \ 185 ADD Rt0, Ra ; \ 186 ADD Ra@>(32-shift), Rb, Ra ; 187 188 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 189 ROUND4(Ra, Rb, Rc, Rd, 0, 6, Rc0) 190 ROUND4(Rd, Ra, Rb, Rc, 7, 10, Rc1) 191 ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2) 192 ROUND4(Rb, Rc, Rd, Ra, 5, 21, Rc3) 193 194 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 195 ROUND4(Ra, Rb, Rc, Rd, 12, 6, Rc0) 196 ROUND4(Rd, Ra, Rb, Rc, 3, 10, Rc1) 197 ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2) 198 ROUND4(Rb, Rc, Rd, Ra, 1, 21, Rc3) 199 200 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 201 ROUND4(Ra, Rb, Rc, Rd, 8, 6, Rc0) 202 ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1) 203 ROUND4(Rc, Rd, Ra, Rb, 6, 15, Rc2) 204 ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3) 205 206 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 207 ROUND4(Ra, Rb, Rc, Rd, 4, 6, Rc0) 208 ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1) 209 ROUND4(Rc, Rd, Ra, Rb, 2, 15, Rc2) 210 ROUND4(Rb, Rc, Rd, Ra, 9, 21, Rc3) 211 212 MOVW dig+0(FP), Rt0 213 MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3] 214 215 ADD Rc0, Ra 216 ADD Rc1, Rb 217 ADD Rc2, Rc 218 ADD Rc3, Rd 219 220 MOVM.IA [Ra,Rb,Rc,Rd], (Rt0) 221 222 MOVW p_data, Rdata 223 MOVW p_end, Rt0 224 ADD $64, Rdata 225 CMP Rt0, Rdata 226 BLO loop 227 228 RET 229 230 // MD5 constants table 231 232 // Round 1 233 DATA ·table+0x00(SB)/4, $0xd76aa478 234 DATA ·table+0x04(SB)/4, $0xe8c7b756 235 DATA ·table+0x08(SB)/4, $0x242070db 236 DATA ·table+0x0c(SB)/4, $0xc1bdceee 237 DATA ·table+0x10(SB)/4, $0xf57c0faf 238 DATA ·table+0x14(SB)/4, $0x4787c62a 239 DATA ·table+0x18(SB)/4, $0xa8304613 240 DATA ·table+0x1c(SB)/4, $0xfd469501 241 DATA ·table+0x20(SB)/4, $0x698098d8 242 DATA ·table+0x24(SB)/4, $0x8b44f7af 243 DATA ·table+0x28(SB)/4, $0xffff5bb1 244 DATA ·table+0x2c(SB)/4, $0x895cd7be 245 DATA ·table+0x30(SB)/4, $0x6b901122 246 DATA ·table+0x34(SB)/4, $0xfd987193 247 DATA ·table+0x38(SB)/4, $0xa679438e 248 DATA ·table+0x3c(SB)/4, $0x49b40821 249 // Round 2 250 DATA ·table+0x40(SB)/4, $0xf61e2562 251 DATA ·table+0x44(SB)/4, $0xc040b340 252 DATA ·table+0x48(SB)/4, $0x265e5a51 253 DATA ·table+0x4c(SB)/4, $0xe9b6c7aa 254 DATA ·table+0x50(SB)/4, $0xd62f105d 255 DATA ·table+0x54(SB)/4, $0x02441453 256 DATA ·table+0x58(SB)/4, $0xd8a1e681 257 DATA ·table+0x5c(SB)/4, $0xe7d3fbc8 258 DATA ·table+0x60(SB)/4, $0x21e1cde6 259 DATA ·table+0x64(SB)/4, $0xc33707d6 260 DATA ·table+0x68(SB)/4, $0xf4d50d87 261 DATA ·table+0x6c(SB)/4, $0x455a14ed 262 DATA ·table+0x70(SB)/4, $0xa9e3e905 263 DATA ·table+0x74(SB)/4, $0xfcefa3f8 264 DATA ·table+0x78(SB)/4, $0x676f02d9 265 DATA ·table+0x7c(SB)/4, $0x8d2a4c8a 266 // Round 3 267 DATA ·table+0x80(SB)/4, $0xfffa3942 268 DATA ·table+0x84(SB)/4, $0x8771f681 269 DATA ·table+0x88(SB)/4, $0x6d9d6122 270 DATA ·table+0x8c(SB)/4, $0xfde5380c 271 DATA ·table+0x90(SB)/4, $0xa4beea44 272 DATA ·table+0x94(SB)/4, $0x4bdecfa9 273 DATA ·table+0x98(SB)/4, $0xf6bb4b60 274 DATA ·table+0x9c(SB)/4, $0xbebfbc70 275 DATA ·table+0xa0(SB)/4, $0x289b7ec6 276 DATA ·table+0xa4(SB)/4, $0xeaa127fa 277 DATA ·table+0xa8(SB)/4, $0xd4ef3085 278 DATA ·table+0xac(SB)/4, $0x04881d05 279 DATA ·table+0xb0(SB)/4, $0xd9d4d039 280 DATA ·table+0xb4(SB)/4, $0xe6db99e5 281 DATA ·table+0xb8(SB)/4, $0x1fa27cf8 282 DATA ·table+0xbc(SB)/4, $0xc4ac5665 283 // Round 4 284 DATA ·table+0xc0(SB)/4, $0xf4292244 285 DATA ·table+0xc4(SB)/4, $0x432aff97 286 DATA ·table+0xc8(SB)/4, $0xab9423a7 287 DATA ·table+0xcc(SB)/4, $0xfc93a039 288 DATA ·table+0xd0(SB)/4, $0x655b59c3 289 DATA ·table+0xd4(SB)/4, $0x8f0ccc92 290 DATA ·table+0xd8(SB)/4, $0xffeff47d 291 DATA ·table+0xdc(SB)/4, $0x85845dd1 292 DATA ·table+0xe0(SB)/4, $0x6fa87e4f 293 DATA ·table+0xe4(SB)/4, $0xfe2ce6e0 294 DATA ·table+0xe8(SB)/4, $0xa3014314 295 DATA ·table+0xec(SB)/4, $0x4e0811a1 296 DATA ·table+0xf0(SB)/4, $0xf7537e82 297 DATA ·table+0xf4(SB)/4, $0xbd3af235 298 DATA ·table+0xf8(SB)/4, $0x2ad7d2bb 299 DATA ·table+0xfc(SB)/4, $0xeb86d391 300 // Global definition 301 GLOBL ·table(SB),8,$256