github.com/mdempsky/go@v0.0.0-20151201204031-5dd372bd1e70/src/crypto/md5/md5block_arm.s (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 // ARM version of md5block.go 6 7 #include "textflag.h" 8 9 // Register definitions 10 #define Rtable R0 // Pointer to MD5 constants table 11 #define Rdata R1 // Pointer to data to hash 12 #define Ra R2 // MD5 accumulator 13 #define Rb R3 // MD5 accumulator 14 #define Rc R4 // MD5 accumulator 15 #define Rd R5 // MD5 accumulator 16 #define Rc0 R6 // MD5 constant 17 #define Rc1 R7 // MD5 constant 18 #define Rc2 R8 // MD5 constant 19 // r9, r10 are forbidden 20 // r11 is OK provided you check the assembler that no synthetic instructions use it 21 #define Rc3 R11 // MD5 constant 22 #define Rt0 R12 // temporary 23 #define Rt1 R14 // temporary 24 25 // func block(dig *digest, p []byte) 26 // 0(FP) is *digest 27 // 4(FP) is p.array (struct Slice) 28 // 8(FP) is p.len 29 //12(FP) is p.cap 30 // 31 // Stack frame 32 #define p_end end-4(SP) // pointer to the end of data 33 #define p_data data-8(SP) // current data pointer 34 #define buf buffer-(8+4*16)(SP) //16 words temporary buffer 35 // 3 words at 4..12(R13) for called routine parameters 36 37 TEXT ·block(SB), NOSPLIT, $84-16 38 MOVW p+4(FP), Rdata // pointer to the data 39 MOVW p_len+8(FP), Rt0 // number of bytes 40 ADD Rdata, Rt0 41 MOVW Rt0, p_end // pointer to end of data 42 43 loop: 44 MOVW Rdata, p_data // Save Rdata 45 AND.S $3, Rdata, Rt0 // TST $3, Rdata not working see issue 5921 46 BEQ aligned // aligned detected - skip copy 47 48 // Copy the unaligned source data into the aligned temporary buffer 49 // memove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers 50 MOVW $buf, Rtable // to 51 MOVW $64, Rc0 // n 52 MOVM.IB [Rtable,Rdata,Rc0], (R13) 53 BL runtime·memmove(SB) 54 55 // Point to the local aligned copy of the data 56 MOVW $buf, Rdata 57 58 aligned: 59 // Point to the table of constants 60 // A PC relative add would be cheaper than this 61 MOVW $·table(SB), Rtable 62 63 // Load up initial MD5 accumulator 64 MOVW dig+0(FP), Rc0 65 MOVM.IA (Rc0), [Ra,Rb,Rc,Rd] 66 67 // a += (((c^d)&b)^d) + X[index] + const 68 // a = a<<shift | a>>(32-shift) + b 69 #define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \ 70 EOR Rc, Rd, Rt0 ; \ 71 AND Rb, Rt0 ; \ 72 EOR Rd, Rt0 ; \ 73 MOVW (index<<2)(Rdata), Rt1 ; \ 74 ADD Rt1, Rt0 ; \ 75 ADD Rconst, Rt0 ; \ 76 ADD Rt0, Ra ; \ 77 ADD Ra@>(32-shift), Rb, Ra ; 78 79 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 80 ROUND1(Ra, Rb, Rc, Rd, 0, 7, Rc0) 81 ROUND1(Rd, Ra, Rb, Rc, 1, 12, Rc1) 82 ROUND1(Rc, Rd, Ra, Rb, 2, 17, Rc2) 83 ROUND1(Rb, Rc, Rd, Ra, 3, 22, Rc3) 84 85 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 86 ROUND1(Ra, Rb, Rc, Rd, 4, 7, Rc0) 87 ROUND1(Rd, Ra, Rb, Rc, 5, 12, Rc1) 88 ROUND1(Rc, Rd, Ra, Rb, 6, 17, Rc2) 89 ROUND1(Rb, Rc, Rd, Ra, 7, 22, Rc3) 90 91 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 92 ROUND1(Ra, Rb, Rc, Rd, 8, 7, Rc0) 93 ROUND1(Rd, Ra, Rb, Rc, 9, 12, Rc1) 94 ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2) 95 ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3) 96 97 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 98 ROUND1(Ra, Rb, Rc, Rd, 12, 7, Rc0) 99 ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1) 100 ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2) 101 ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3) 102 103 // a += (((b^c)&d)^c) + X[index] + const 104 // a = a<<shift | a>>(32-shift) + b 105 #define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \ 106 EOR Rb, Rc, Rt0 ; \ 107 AND Rd, Rt0 ; \ 108 EOR Rc, Rt0 ; \ 109 MOVW (index<<2)(Rdata), Rt1 ; \ 110 ADD Rt1, Rt0 ; \ 111 ADD Rconst, Rt0 ; \ 112 ADD Rt0, Ra ; \ 113 ADD Ra@>(32-shift), Rb, Ra ; 114 115 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 116 ROUND2(Ra, Rb, Rc, Rd, 1, 5, Rc0) 117 ROUND2(Rd, Ra, Rb, Rc, 6, 9, Rc1) 118 ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2) 119 ROUND2(Rb, Rc, Rd, Ra, 0, 20, Rc3) 120 121 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 122 ROUND2(Ra, Rb, Rc, Rd, 5, 5, Rc0) 123 ROUND2(Rd, Ra, Rb, Rc, 10, 9, Rc1) 124 ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2) 125 ROUND2(Rb, Rc, Rd, Ra, 4, 20, Rc3) 126 127 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 128 ROUND2(Ra, Rb, Rc, Rd, 9, 5, Rc0) 129 ROUND2(Rd, Ra, Rb, Rc, 14, 9, Rc1) 130 ROUND2(Rc, Rd, Ra, Rb, 3, 14, Rc2) 131 ROUND2(Rb, Rc, Rd, Ra, 8, 20, Rc3) 132 133 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 134 ROUND2(Ra, Rb, Rc, Rd, 13, 5, Rc0) 135 ROUND2(Rd, Ra, Rb, Rc, 2, 9, Rc1) 136 ROUND2(Rc, Rd, Ra, Rb, 7, 14, Rc2) 137 ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3) 138 139 // a += (b^c^d) + X[index] + const 140 // a = a<<shift | a>>(32-shift) + b 141 #define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \ 142 EOR Rb, Rc, Rt0 ; \ 143 EOR Rd, Rt0 ; \ 144 MOVW (index<<2)(Rdata), Rt1 ; \ 145 ADD Rt1, Rt0 ; \ 146 ADD Rconst, Rt0 ; \ 147 ADD Rt0, Ra ; \ 148 ADD Ra@>(32-shift), Rb, Ra ; 149 150 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 151 ROUND3(Ra, Rb, Rc, Rd, 5, 4, Rc0) 152 ROUND3(Rd, Ra, Rb, Rc, 8, 11, Rc1) 153 ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2) 154 ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3) 155 156 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 157 ROUND3(Ra, Rb, Rc, Rd, 1, 4, Rc0) 158 ROUND3(Rd, Ra, Rb, Rc, 4, 11, Rc1) 159 ROUND3(Rc, Rd, Ra, Rb, 7, 16, Rc2) 160 ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3) 161 162 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 163 ROUND3(Ra, Rb, Rc, Rd, 13, 4, Rc0) 164 ROUND3(Rd, Ra, Rb, Rc, 0, 11, Rc1) 165 ROUND3(Rc, Rd, Ra, Rb, 3, 16, Rc2) 166 ROUND3(Rb, Rc, Rd, Ra, 6, 23, Rc3) 167 168 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 169 ROUND3(Ra, Rb, Rc, Rd, 9, 4, Rc0) 170 ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1) 171 ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2) 172 ROUND3(Rb, Rc, Rd, Ra, 2, 23, Rc3) 173 174 // a += (c^(b|^d)) + X[index] + const 175 // a = a<<shift | a>>(32-shift) + b 176 #define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \ 177 MVN Rd, Rt0 ; \ 178 ORR Rb, Rt0 ; \ 179 EOR Rc, Rt0 ; \ 180 MOVW (index<<2)(Rdata), Rt1 ; \ 181 ADD Rt1, Rt0 ; \ 182 ADD Rconst, Rt0 ; \ 183 ADD Rt0, Ra ; \ 184 ADD Ra@>(32-shift), Rb, Ra ; 185 186 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 187 ROUND4(Ra, Rb, Rc, Rd, 0, 6, Rc0) 188 ROUND4(Rd, Ra, Rb, Rc, 7, 10, Rc1) 189 ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2) 190 ROUND4(Rb, Rc, Rd, Ra, 5, 21, Rc3) 191 192 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 193 ROUND4(Ra, Rb, Rc, Rd, 12, 6, Rc0) 194 ROUND4(Rd, Ra, Rb, Rc, 3, 10, Rc1) 195 ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2) 196 ROUND4(Rb, Rc, Rd, Ra, 1, 21, Rc3) 197 198 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 199 ROUND4(Ra, Rb, Rc, Rd, 8, 6, Rc0) 200 ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1) 201 ROUND4(Rc, Rd, Ra, Rb, 6, 15, Rc2) 202 ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3) 203 204 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3] 205 ROUND4(Ra, Rb, Rc, Rd, 4, 6, Rc0) 206 ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1) 207 ROUND4(Rc, Rd, Ra, Rb, 2, 15, Rc2) 208 ROUND4(Rb, Rc, Rd, Ra, 9, 21, Rc3) 209 210 MOVW dig+0(FP), Rt0 211 MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3] 212 213 ADD Rc0, Ra 214 ADD Rc1, Rb 215 ADD Rc2, Rc 216 ADD Rc3, Rd 217 218 MOVM.IA [Ra,Rb,Rc,Rd], (Rt0) 219 220 MOVW p_data, Rdata 221 MOVW p_end, Rt0 222 ADD $64, Rdata 223 CMP Rt0, Rdata 224 BLO loop 225 226 RET 227 228 // MD5 constants table 229 230 // Round 1 231 DATA ·table+0x00(SB)/4, $0xd76aa478 232 DATA ·table+0x04(SB)/4, $0xe8c7b756 233 DATA ·table+0x08(SB)/4, $0x242070db 234 DATA ·table+0x0c(SB)/4, $0xc1bdceee 235 DATA ·table+0x10(SB)/4, $0xf57c0faf 236 DATA ·table+0x14(SB)/4, $0x4787c62a 237 DATA ·table+0x18(SB)/4, $0xa8304613 238 DATA ·table+0x1c(SB)/4, $0xfd469501 239 DATA ·table+0x20(SB)/4, $0x698098d8 240 DATA ·table+0x24(SB)/4, $0x8b44f7af 241 DATA ·table+0x28(SB)/4, $0xffff5bb1 242 DATA ·table+0x2c(SB)/4, $0x895cd7be 243 DATA ·table+0x30(SB)/4, $0x6b901122 244 DATA ·table+0x34(SB)/4, $0xfd987193 245 DATA ·table+0x38(SB)/4, $0xa679438e 246 DATA ·table+0x3c(SB)/4, $0x49b40821 247 // Round 2 248 DATA ·table+0x40(SB)/4, $0xf61e2562 249 DATA ·table+0x44(SB)/4, $0xc040b340 250 DATA ·table+0x48(SB)/4, $0x265e5a51 251 DATA ·table+0x4c(SB)/4, $0xe9b6c7aa 252 DATA ·table+0x50(SB)/4, $0xd62f105d 253 DATA ·table+0x54(SB)/4, $0x02441453 254 DATA ·table+0x58(SB)/4, $0xd8a1e681 255 DATA ·table+0x5c(SB)/4, $0xe7d3fbc8 256 DATA ·table+0x60(SB)/4, $0x21e1cde6 257 DATA ·table+0x64(SB)/4, $0xc33707d6 258 DATA ·table+0x68(SB)/4, $0xf4d50d87 259 DATA ·table+0x6c(SB)/4, $0x455a14ed 260 DATA ·table+0x70(SB)/4, $0xa9e3e905 261 DATA ·table+0x74(SB)/4, $0xfcefa3f8 262 DATA ·table+0x78(SB)/4, $0x676f02d9 263 DATA ·table+0x7c(SB)/4, $0x8d2a4c8a 264 // Round 3 265 DATA ·table+0x80(SB)/4, $0xfffa3942 266 DATA ·table+0x84(SB)/4, $0x8771f681 267 DATA ·table+0x88(SB)/4, $0x6d9d6122 268 DATA ·table+0x8c(SB)/4, $0xfde5380c 269 DATA ·table+0x90(SB)/4, $0xa4beea44 270 DATA ·table+0x94(SB)/4, $0x4bdecfa9 271 DATA ·table+0x98(SB)/4, $0xf6bb4b60 272 DATA ·table+0x9c(SB)/4, $0xbebfbc70 273 DATA ·table+0xa0(SB)/4, $0x289b7ec6 274 DATA ·table+0xa4(SB)/4, $0xeaa127fa 275 DATA ·table+0xa8(SB)/4, $0xd4ef3085 276 DATA ·table+0xac(SB)/4, $0x04881d05 277 DATA ·table+0xb0(SB)/4, $0xd9d4d039 278 DATA ·table+0xb4(SB)/4, $0xe6db99e5 279 DATA ·table+0xb8(SB)/4, $0x1fa27cf8 280 DATA ·table+0xbc(SB)/4, $0xc4ac5665 281 // Round 4 282 DATA ·table+0xc0(SB)/4, $0xf4292244 283 DATA ·table+0xc4(SB)/4, $0x432aff97 284 DATA ·table+0xc8(SB)/4, $0xab9423a7 285 DATA ·table+0xcc(SB)/4, $0xfc93a039 286 DATA ·table+0xd0(SB)/4, $0x655b59c3 287 DATA ·table+0xd4(SB)/4, $0x8f0ccc92 288 DATA ·table+0xd8(SB)/4, $0xffeff47d 289 DATA ·table+0xdc(SB)/4, $0x85845dd1 290 DATA ·table+0xe0(SB)/4, $0x6fa87e4f 291 DATA ·table+0xe4(SB)/4, $0xfe2ce6e0 292 DATA ·table+0xe8(SB)/4, $0xa3014314 293 DATA ·table+0xec(SB)/4, $0x4e0811a1 294 DATA ·table+0xf0(SB)/4, $0xf7537e82 295 DATA ·table+0xf4(SB)/4, $0xbd3af235 296 DATA ·table+0xf8(SB)/4, $0x2ad7d2bb 297 DATA ·table+0xfc(SB)/4, $0xeb86d391 298 // Global definition 299 GLOBL ·table(SB),8,$256