github.com/gidoBOSSftw5731/go/src@v0.0.0-20210226122457-d24b0edbf019/crypto/md5/md5block_ppc64x.s (about) 1 // Original source: 2 // http://www.zorinaq.com/papers/md5-amd64.html 3 // http://www.zorinaq.com/papers/md5-amd64.tar.bz2 4 // 5 // MD5 optimized for ppc64le using Go's assembler for 6 // ppc64le, based on md5block_amd64.s implementation by 7 // the Go authors. 8 // 9 // Author: Marc Bevand <bevand_m (at) epita.fr> 10 // Licence: I hereby disclaim the copyright on this code and place it 11 // in the public domain. 12 13 // +build ppc64 ppc64le 14 15 #include "textflag.h" 16 17 // ENDIAN_MOVE generates the appropriate 18 // 4 byte load for big or little endian. 19 // The 4 bytes at ptr+off is loaded into dst. 20 // The idx reg is only needed for big endian 21 // and is clobbered when used. 22 #ifdef GOARCH_ppc64le 23 #define ENDIAN_MOVE(off, ptr, dst, idx) \ 24 MOVWZ off(ptr),dst 25 #else 26 #define ENDIAN_MOVE(off, ptr, dst, idx) \ 27 MOVD $off,idx; \ 28 MOVWBR (idx)(ptr), dst 29 #endif 30 31 TEXT ·block(SB),NOSPLIT,$0-32 32 MOVD dig+0(FP), R10 33 MOVD p+8(FP), R6 34 MOVD p_len+16(FP), R5 35 SLD $6, R5 36 SRD $6, R5 37 ADD R6, R5, R7 38 39 MOVWZ 0(R10), R22 40 MOVWZ 4(R10), R3 41 MOVWZ 8(R10), R4 42 MOVWZ 12(R10), R5 43 CMP R6, R7 44 BEQ end 45 46 loop: 47 MOVWZ R22, R14 48 MOVWZ R3, R15 49 MOVWZ R4, R16 50 MOVWZ R5, R17 51 52 ENDIAN_MOVE(0,R6,R8,R21) 53 MOVWZ R5, R9 54 55 #define ROUND1(a, b, c, d, index, const, shift) \ 56 XOR c, R9; \ 57 ADD $const, a; \ 58 ADD R8, a; \ 59 AND b, R9; \ 60 XOR d, R9; \ 61 ENDIAN_MOVE(index*4,R6,R8,R21); \ 62 ADD R9, a; \ 63 RLWMI $shift, a, $0xffffffff, a; \ 64 MOVWZ c, R9; \ 65 ADD b, a; \ 66 MOVWZ a, a 67 68 ROUND1(R22,R3,R4,R5, 1,0xd76aa478, 7); 69 ROUND1(R5,R22,R3,R4, 2,0xe8c7b756,12); 70 ROUND1(R4,R5,R22,R3, 3,0x242070db,17); 71 ROUND1(R3,R4,R5,R22, 4,0xc1bdceee,22); 72 ROUND1(R22,R3,R4,R5, 5,0xf57c0faf, 7); 73 ROUND1(R5,R22,R3,R4, 6,0x4787c62a,12); 74 ROUND1(R4,R5,R22,R3, 7,0xa8304613,17); 75 ROUND1(R3,R4,R5,R22, 8,0xfd469501,22); 76 ROUND1(R22,R3,R4,R5, 9,0x698098d8, 7); 77 ROUND1(R5,R22,R3,R4,10,0x8b44f7af,12); 78 ROUND1(R4,R5,R22,R3,11,0xffff5bb1,17); 79 ROUND1(R3,R4,R5,R22,12,0x895cd7be,22); 80 ROUND1(R22,R3,R4,R5,13,0x6b901122, 7); 81 ROUND1(R5,R22,R3,R4,14,0xfd987193,12); 82 ROUND1(R4,R5,R22,R3,15,0xa679438e,17); 83 ROUND1(R3,R4,R5,R22, 0,0x49b40821,22); 84 85 ENDIAN_MOVE(1*4,R6,R8,R21) 86 MOVWZ R5, R9 87 MOVWZ R5, R10 88 89 #define ROUND2(a, b, c, d, index, const, shift) \ 90 XOR $0xffffffff, R9; \ // NOTW R9 91 ADD $const, a; \ 92 ADD R8, a; \ 93 AND b, R10; \ 94 AND c, R9; \ 95 ENDIAN_MOVE(index*4,R6,R8,R21); \ 96 OR R9, R10; \ 97 MOVWZ c, R9; \ 98 ADD R10, a; \ 99 MOVWZ c, R10; \ 100 RLWMI $shift, a, $0xffffffff, a; \ 101 ADD b, a; \ 102 MOVWZ a, a 103 104 ROUND2(R22,R3,R4,R5, 6,0xf61e2562, 5); 105 ROUND2(R5,R22,R3,R4,11,0xc040b340, 9); 106 ROUND2(R4,R5,R22,R3, 0,0x265e5a51,14); 107 ROUND2(R3,R4,R5,R22, 5,0xe9b6c7aa,20); 108 ROUND2(R22,R3,R4,R5,10,0xd62f105d, 5); 109 ROUND2(R5,R22,R3,R4,15, 0x2441453, 9); 110 ROUND2(R4,R5,R22,R3, 4,0xd8a1e681,14); 111 ROUND2(R3,R4,R5,R22, 9,0xe7d3fbc8,20); 112 ROUND2(R22,R3,R4,R5,14,0x21e1cde6, 5); 113 ROUND2(R5,R22,R3,R4, 3,0xc33707d6, 9); 114 ROUND2(R4,R5,R22,R3, 8,0xf4d50d87,14); 115 ROUND2(R3,R4,R5,R22,13,0x455a14ed,20); 116 ROUND2(R22,R3,R4,R5, 2,0xa9e3e905, 5); 117 ROUND2(R5,R22,R3,R4, 7,0xfcefa3f8, 9); 118 ROUND2(R4,R5,R22,R3,12,0x676f02d9,14); 119 ROUND2(R3,R4,R5,R22, 0,0x8d2a4c8a,20); 120 121 ENDIAN_MOVE(5*4,R6,R8,R21) 122 MOVWZ R4, R9 123 124 #define ROUND3(a, b, c, d, index, const, shift) \ 125 ADD $const, a; \ 126 ADD R8, a; \ 127 ENDIAN_MOVE(index*4,R6,R8,R21); \ 128 XOR d, R9; \ 129 XOR b, R9; \ 130 ADD R9, a; \ 131 RLWMI $shift, a, $0xffffffff, a; \ 132 MOVWZ b, R9; \ 133 ADD b, a; \ 134 MOVWZ a, a 135 136 ROUND3(R22,R3,R4,R5, 8,0xfffa3942, 4); 137 ROUND3(R5,R22,R3,R4,11,0x8771f681,11); 138 ROUND3(R4,R5,R22,R3,14,0x6d9d6122,16); 139 ROUND3(R3,R4,R5,R22, 1,0xfde5380c,23); 140 ROUND3(R22,R3,R4,R5, 4,0xa4beea44, 4); 141 ROUND3(R5,R22,R3,R4, 7,0x4bdecfa9,11); 142 ROUND3(R4,R5,R22,R3,10,0xf6bb4b60,16); 143 ROUND3(R3,R4,R5,R22,13,0xbebfbc70,23); 144 ROUND3(R22,R3,R4,R5, 0,0x289b7ec6, 4); 145 ROUND3(R5,R22,R3,R4, 3,0xeaa127fa,11); 146 ROUND3(R4,R5,R22,R3, 6,0xd4ef3085,16); 147 ROUND3(R3,R4,R5,R22, 9, 0x4881d05,23); 148 ROUND3(R22,R3,R4,R5,12,0xd9d4d039, 4); 149 ROUND3(R5,R22,R3,R4,15,0xe6db99e5,11); 150 ROUND3(R4,R5,R22,R3, 2,0x1fa27cf8,16); 151 ROUND3(R3,R4,R5,R22, 0,0xc4ac5665,23); 152 153 ENDIAN_MOVE(0,R6,R8,R21) 154 MOVWZ $0xffffffff, R9 155 XOR R5, R9 156 157 #define ROUND4(a, b, c, d, index, const, shift) \ 158 ADD $const, a; \ 159 ADD R8, a; \ 160 OR b, R9; \ 161 XOR c, R9; \ 162 ADD R9, a; \ 163 ENDIAN_MOVE(index*4,R6,R8,R21); \ 164 MOVWZ $0xffffffff, R9; \ 165 RLWMI $shift, a, $0xffffffff, a; \ 166 XOR c, R9; \ 167 ADD b, a; \ 168 MOVWZ a, a 169 170 ROUND4(R22,R3,R4,R5, 7,0xf4292244, 6); 171 ROUND4(R5,R22,R3,R4,14,0x432aff97,10); 172 ROUND4(R4,R5,R22,R3, 5,0xab9423a7,15); 173 ROUND4(R3,R4,R5,R22,12,0xfc93a039,21); 174 ROUND4(R22,R3,R4,R5, 3,0x655b59c3, 6); 175 ROUND4(R5,R22,R3,R4,10,0x8f0ccc92,10); 176 ROUND4(R4,R5,R22,R3, 1,0xffeff47d,15); 177 ROUND4(R3,R4,R5,R22, 8,0x85845dd1,21); 178 ROUND4(R22,R3,R4,R5,15,0x6fa87e4f, 6); 179 ROUND4(R5,R22,R3,R4, 6,0xfe2ce6e0,10); 180 ROUND4(R4,R5,R22,R3,13,0xa3014314,15); 181 ROUND4(R3,R4,R5,R22, 4,0x4e0811a1,21); 182 ROUND4(R22,R3,R4,R5,11,0xf7537e82, 6); 183 ROUND4(R5,R22,R3,R4, 2,0xbd3af235,10); 184 ROUND4(R4,R5,R22,R3, 9,0x2ad7d2bb,15); 185 ROUND4(R3,R4,R5,R22, 0,0xeb86d391,21); 186 187 ADD R14, R22 188 ADD R15, R3 189 ADD R16, R4 190 ADD R17, R5 191 ADD $64, R6 192 CMP R6, R7 193 BLT loop 194 195 end: 196 MOVD dig+0(FP), R10 197 MOVWZ R22, 0(R10) 198 MOVWZ R3, 4(R10) 199 MOVWZ R4, 8(R10) 200 MOVWZ R5, 12(R10) 201 RET