github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/crypto/md5/md5block_ppc64le.s (about) 1 // Original source: 2 // http://www.zorinaq.com/papers/md5-amd64.html 3 // http://www.zorinaq.com/papers/md5-amd64.tar.bz2 4 // 5 // MD5 optimized for ppc64le using Go's assembler for 6 // ppc64le, based on md5block_amd64.s implementation by 7 // the Go authors. 8 // 9 // Author: Marc Bevand <bevand_m (at) epita.fr> 10 // Licence: I hereby disclaim the copyright on this code and place it 11 // in the public domain. 12 13 #include "textflag.h" 14 15 // TODO: Could be updated for ppc64 big endian 16 // by using the correct byte reverse instruction. 17 // Changes required in the Go assembler to make 18 // that instruction work. 19 20 #define MOVE_LITTLE_ENDIAN MOVWZ 21 22 TEXT ·block(SB),NOSPLIT,$0-32 23 MOVD dig+0(FP), R10 24 MOVD p+8(FP), R6 25 MOVD p_len+16(FP), R5 26 SLD $6, R5 27 SRD $6, R5 28 ADD R6, R5, R7 29 30 MOVWZ 0(R10), R22 31 MOVWZ 4(R10), R3 32 MOVWZ 8(R10), R4 33 MOVWZ 12(R10), R5 34 CMP R6, R7 35 BEQ end 36 37 loop: 38 MOVWZ R22, R14 39 MOVWZ R3, R15 40 MOVWZ R4, R16 41 MOVWZ R5, R17 42 43 MOVE_LITTLE_ENDIAN 0(R6), R8 44 MOVWZ R5, R9 45 46 #define ROUND1(a, b, c, d, index, const, shift) \ 47 XOR c, R9; \ 48 ADD $const, a; \ 49 ADD R8, a; \ 50 AND b, R9; \ 51 XOR d, R9; \ 52 MOVE_LITTLE_ENDIAN (index*4)(R6), R8; \ 53 ADD R9, a; \ 54 RLWMI $shift, a, $0xffffffff, a; \ 55 MOVWZ c, R9; \ 56 ADD b, a; \ 57 MOVWZ a, a 58 59 ROUND1(R22,R3,R4,R5, 1,0xd76aa478, 7); 60 ROUND1(R5,R22,R3,R4, 2,0xe8c7b756,12); 61 ROUND1(R4,R5,R22,R3, 3,0x242070db,17); 62 ROUND1(R3,R4,R5,R22, 4,0xc1bdceee,22); 63 ROUND1(R22,R3,R4,R5, 5,0xf57c0faf, 7); 64 ROUND1(R5,R22,R3,R4, 6,0x4787c62a,12); 65 ROUND1(R4,R5,R22,R3, 7,0xa8304613,17); 66 ROUND1(R3,R4,R5,R22, 8,0xfd469501,22); 67 ROUND1(R22,R3,R4,R5, 9,0x698098d8, 7); 68 ROUND1(R5,R22,R3,R4,10,0x8b44f7af,12); 69 ROUND1(R4,R5,R22,R3,11,0xffff5bb1,17); 70 ROUND1(R3,R4,R5,R22,12,0x895cd7be,22); 71 ROUND1(R22,R3,R4,R5,13,0x6b901122, 7); 72 ROUND1(R5,R22,R3,R4,14,0xfd987193,12); 73 ROUND1(R4,R5,R22,R3,15,0xa679438e,17); 74 ROUND1(R3,R4,R5,R22, 0,0x49b40821,22); 75 76 MOVE_LITTLE_ENDIAN (1*4)(R6), R8 77 MOVWZ R5, R9 78 MOVWZ R5, R10 79 80 #define ROUND2(a, b, c, d, index, const, shift) \ 81 XOR $0xffffffff, R9; \ // NOTW R9 82 ADD $const, a; \ 83 ADD R8, a; \ 84 AND b, R10; \ 85 AND c, R9; \ 86 MOVE_LITTLE_ENDIAN (index*4)(R6), R8; \ 87 OR R9, R10; \ 88 MOVWZ c, R9; \ 89 ADD R10, a; \ 90 MOVWZ c, R10; \ 91 RLWMI $shift, a, $0xffffffff, a; \ 92 ADD b, a; \ 93 MOVWZ a, a 94 95 ROUND2(R22,R3,R4,R5, 6,0xf61e2562, 5); 96 ROUND2(R5,R22,R3,R4,11,0xc040b340, 9); 97 ROUND2(R4,R5,R22,R3, 0,0x265e5a51,14); 98 ROUND2(R3,R4,R5,R22, 5,0xe9b6c7aa,20); 99 ROUND2(R22,R3,R4,R5,10,0xd62f105d, 5); 100 ROUND2(R5,R22,R3,R4,15, 0x2441453, 9); 101 ROUND2(R4,R5,R22,R3, 4,0xd8a1e681,14); 102 ROUND2(R3,R4,R5,R22, 9,0xe7d3fbc8,20); 103 ROUND2(R22,R3,R4,R5,14,0x21e1cde6, 5); 104 ROUND2(R5,R22,R3,R4, 3,0xc33707d6, 9); 105 ROUND2(R4,R5,R22,R3, 8,0xf4d50d87,14); 106 ROUND2(R3,R4,R5,R22,13,0x455a14ed,20); 107 ROUND2(R22,R3,R4,R5, 2,0xa9e3e905, 5); 108 ROUND2(R5,R22,R3,R4, 7,0xfcefa3f8, 9); 109 ROUND2(R4,R5,R22,R3,12,0x676f02d9,14); 110 ROUND2(R3,R4,R5,R22, 0,0x8d2a4c8a,20); 111 112 MOVE_LITTLE_ENDIAN (5*4)(R6), R8 113 MOVWZ R4, R9 114 115 #define ROUND3(a, b, c, d, index, const, shift) \ 116 ADD $const, a; \ 117 ADD R8, a; \ 118 MOVE_LITTLE_ENDIAN (index*4)(R6), R8; \ 119 XOR d, R9; \ 120 XOR b, R9; \ 121 ADD R9, a; \ 122 RLWMI $shift, a, $0xffffffff, a; \ 123 MOVWZ b, R9; \ 124 ADD b, a; \ 125 MOVWZ a, a 126 127 ROUND3(R22,R3,R4,R5, 8,0xfffa3942, 4); 128 ROUND3(R5,R22,R3,R4,11,0x8771f681,11); 129 ROUND3(R4,R5,R22,R3,14,0x6d9d6122,16); 130 ROUND3(R3,R4,R5,R22, 1,0xfde5380c,23); 131 ROUND3(R22,R3,R4,R5, 4,0xa4beea44, 4); 132 ROUND3(R5,R22,R3,R4, 7,0x4bdecfa9,11); 133 ROUND3(R4,R5,R22,R3,10,0xf6bb4b60,16); 134 ROUND3(R3,R4,R5,R22,13,0xbebfbc70,23); 135 ROUND3(R22,R3,R4,R5, 0,0x289b7ec6, 4); 136 ROUND3(R5,R22,R3,R4, 3,0xeaa127fa,11); 137 ROUND3(R4,R5,R22,R3, 6,0xd4ef3085,16); 138 ROUND3(R3,R4,R5,R22, 9, 0x4881d05,23); 139 ROUND3(R22,R3,R4,R5,12,0xd9d4d039, 4); 140 ROUND3(R5,R22,R3,R4,15,0xe6db99e5,11); 141 ROUND3(R4,R5,R22,R3, 2,0x1fa27cf8,16); 142 ROUND3(R3,R4,R5,R22, 0,0xc4ac5665,23); 143 144 MOVE_LITTLE_ENDIAN (0*4)(R6), R8 145 MOVWZ $0xffffffff, R9 146 XOR R5, R9 147 148 #define ROUND4(a, b, c, d, index, const, shift) \ 149 ADD $const, a; \ 150 ADD R8, a; \ 151 OR b, R9; \ 152 XOR c, R9; \ 153 ADD R9, a; \ 154 MOVE_LITTLE_ENDIAN (index*4)(R6), R8; \ 155 MOVWZ $0xffffffff, R9; \ 156 RLWMI $shift, a, $0xffffffff, a; \ 157 XOR c, R9; \ 158 ADD b, a; \ 159 MOVWZ a, a 160 161 ROUND4(R22,R3,R4,R5, 7,0xf4292244, 6); 162 ROUND4(R5,R22,R3,R4,14,0x432aff97,10); 163 ROUND4(R4,R5,R22,R3, 5,0xab9423a7,15); 164 ROUND4(R3,R4,R5,R22,12,0xfc93a039,21); 165 ROUND4(R22,R3,R4,R5, 3,0x655b59c3, 6); 166 ROUND4(R5,R22,R3,R4,10,0x8f0ccc92,10); 167 ROUND4(R4,R5,R22,R3, 1,0xffeff47d,15); 168 ROUND4(R3,R4,R5,R22, 8,0x85845dd1,21); 169 ROUND4(R22,R3,R4,R5,15,0x6fa87e4f, 6); 170 ROUND4(R5,R22,R3,R4, 6,0xfe2ce6e0,10); 171 ROUND4(R4,R5,R22,R3,13,0xa3014314,15); 172 ROUND4(R3,R4,R5,R22, 4,0x4e0811a1,21); 173 ROUND4(R22,R3,R4,R5,11,0xf7537e82, 6); 174 ROUND4(R5,R22,R3,R4, 2,0xbd3af235,10); 175 ROUND4(R4,R5,R22,R3, 9,0x2ad7d2bb,15); 176 ROUND4(R3,R4,R5,R22, 0,0xeb86d391,21); 177 178 ADD R14, R22 179 ADD R15, R3 180 ADD R16, R4 181 ADD R17, R5 182 ADD $64, R6 183 CMP R6, R7 184 BLT loop 185 186 end: 187 MOVD dig+0(FP), R10 188 MOVWZ R22, 0(R10) 189 MOVWZ R3, 4(R10) 190 MOVWZ R4, 8(R10) 191 MOVWZ R5, 12(R10) 192 RET