github.com/c12o16h1/go/src@v0.0.0-20200114212001-5a151c0f00ed/crypto/md5/md5block_ppc64x.s (about)

     1  // Original source:
     2  //	http://www.zorinaq.com/papers/md5-amd64.html
     3  //	http://www.zorinaq.com/papers/md5-amd64.tar.bz2
     4  //
     5  // MD5 optimized for ppc64le using Go's assembler for
     6  // ppc64le, based on md5block_amd64.s implementation by
     7  // the Go authors.
     8  //
     9  // Author: Marc Bevand <bevand_m (at) epita.fr>
    10  // Licence: I hereby disclaim the copyright on this code and place it
    11  // in the public domain.
    12  
    13  // +build ppc64 ppc64le
    14  
    15  #include "textflag.h"
    16  
    17  // ENDIAN_MOVE generates the appropriate
    18  // 4 byte load for big or little endian.
    19  // The 4 bytes at ptr+off is loaded into dst.
    20  // The idx reg is only needed for big endian
    21  // and is clobbered when used.
    22  #ifdef GOARCH_ppc64le
    23  #define ENDIAN_MOVE(off, ptr, dst, idx) \
    24  	MOVWZ	off(ptr),dst
    25  #else
    26  #define ENDIAN_MOVE(off, ptr, dst, idx) \
    27  	MOVD	$off,idx; \
    28  	MOVWBR	(idx)(ptr), dst
    29  #endif
    30  
    31  TEXT ·block(SB),NOSPLIT,$0-32
    32  	MOVD	dig+0(FP), R10
    33  	MOVD	p+8(FP), R6
    34  	MOVD	p_len+16(FP), R5
    35  	SLD	$6, R5
    36  	SRD	$6, R5
    37  	ADD	R6, R5, R7
    38  
    39  	MOVWZ	0(R10), R22
    40  	MOVWZ	4(R10), R3
    41  	MOVWZ	8(R10), R4
    42  	MOVWZ	12(R10), R5
    43  	CMP	R6, R7
    44  	BEQ	end
    45  
    46  loop:
    47  	MOVWZ	R22, R14
    48  	MOVWZ	R3, R15
    49  	MOVWZ	R4, R16
    50  	MOVWZ	R5, R17
    51  
    52  	ENDIAN_MOVE(0,R6,R8,R21)
    53  	MOVWZ	R5, R9
    54  
    55  #define ROUND1(a, b, c, d, index, const, shift) \
    56  	XOR	c, R9; \
    57  	ADD	$const, a; \
    58  	ADD	R8, a; \
    59  	AND	b, R9; \
    60  	XOR	d, R9; \
    61  	ENDIAN_MOVE(index*4,R6,R8,R21); \
    62  	ADD	R9, a; \
    63  	RLWMI	$shift, a, $0xffffffff, a; \
    64  	MOVWZ	c, R9; \
    65  	ADD	b, a; \
    66  	MOVWZ	a, a
    67  
    68  	ROUND1(R22,R3,R4,R5, 1,0xd76aa478, 7);
    69  	ROUND1(R5,R22,R3,R4, 2,0xe8c7b756,12);
    70  	ROUND1(R4,R5,R22,R3, 3,0x242070db,17);
    71  	ROUND1(R3,R4,R5,R22, 4,0xc1bdceee,22);
    72  	ROUND1(R22,R3,R4,R5, 5,0xf57c0faf, 7);
    73  	ROUND1(R5,R22,R3,R4, 6,0x4787c62a,12);
    74  	ROUND1(R4,R5,R22,R3, 7,0xa8304613,17);
    75  	ROUND1(R3,R4,R5,R22, 8,0xfd469501,22);
    76  	ROUND1(R22,R3,R4,R5, 9,0x698098d8, 7);
    77  	ROUND1(R5,R22,R3,R4,10,0x8b44f7af,12);
    78  	ROUND1(R4,R5,R22,R3,11,0xffff5bb1,17);
    79  	ROUND1(R3,R4,R5,R22,12,0x895cd7be,22);
    80  	ROUND1(R22,R3,R4,R5,13,0x6b901122, 7);
    81  	ROUND1(R5,R22,R3,R4,14,0xfd987193,12);
    82  	ROUND1(R4,R5,R22,R3,15,0xa679438e,17);
    83  	ROUND1(R3,R4,R5,R22, 0,0x49b40821,22);
    84  
    85  	ENDIAN_MOVE(1*4,R6,R8,R21)
    86  	MOVWZ	R5, R9
    87  	MOVWZ	R5, R10
    88  
    89  #define ROUND2(a, b, c, d, index, const, shift) \
    90  	XOR	$0xffffffff, R9; \ // NOTW R9
    91  	ADD	$const, a; \
    92  	ADD	R8, a; \
    93  	AND	b, R10; \
    94  	AND	c, R9; \
    95  	ENDIAN_MOVE(index*4,R6,R8,R21); \
    96  	OR	R9, R10; \
    97  	MOVWZ	c, R9; \
    98  	ADD	R10, a; \
    99  	MOVWZ	c, R10; \
   100  	RLWMI	$shift, a, $0xffffffff, a; \
   101  	ADD	b, a; \
   102  	MOVWZ	a, a
   103  
   104  	ROUND2(R22,R3,R4,R5, 6,0xf61e2562, 5);
   105  	ROUND2(R5,R22,R3,R4,11,0xc040b340, 9);
   106  	ROUND2(R4,R5,R22,R3, 0,0x265e5a51,14);
   107  	ROUND2(R3,R4,R5,R22, 5,0xe9b6c7aa,20);
   108  	ROUND2(R22,R3,R4,R5,10,0xd62f105d, 5);
   109  	ROUND2(R5,R22,R3,R4,15, 0x2441453, 9);
   110  	ROUND2(R4,R5,R22,R3, 4,0xd8a1e681,14);
   111  	ROUND2(R3,R4,R5,R22, 9,0xe7d3fbc8,20);
   112  	ROUND2(R22,R3,R4,R5,14,0x21e1cde6, 5);
   113  	ROUND2(R5,R22,R3,R4, 3,0xc33707d6, 9);
   114  	ROUND2(R4,R5,R22,R3, 8,0xf4d50d87,14);
   115  	ROUND2(R3,R4,R5,R22,13,0x455a14ed,20);
   116  	ROUND2(R22,R3,R4,R5, 2,0xa9e3e905, 5);
   117  	ROUND2(R5,R22,R3,R4, 7,0xfcefa3f8, 9);
   118  	ROUND2(R4,R5,R22,R3,12,0x676f02d9,14);
   119  	ROUND2(R3,R4,R5,R22, 0,0x8d2a4c8a,20);
   120  
   121  	ENDIAN_MOVE(5*4,R6,R8,R21)
   122  	MOVWZ	R4, R9
   123  
   124  #define ROUND3(a, b, c, d, index, const, shift) \
   125  	ADD	$const, a; \
   126  	ADD	R8, a; \
   127  	ENDIAN_MOVE(index*4,R6,R8,R21); \
   128  	XOR	d, R9; \
   129  	XOR	b, R9; \
   130  	ADD	R9, a; \
   131  	RLWMI	$shift, a, $0xffffffff, a; \
   132  	MOVWZ	b, R9; \
   133  	ADD	b, a; \
   134  	MOVWZ	a, a
   135  
   136  	ROUND3(R22,R3,R4,R5, 8,0xfffa3942, 4);
   137  	ROUND3(R5,R22,R3,R4,11,0x8771f681,11);
   138  	ROUND3(R4,R5,R22,R3,14,0x6d9d6122,16);
   139  	ROUND3(R3,R4,R5,R22, 1,0xfde5380c,23);
   140  	ROUND3(R22,R3,R4,R5, 4,0xa4beea44, 4);
   141  	ROUND3(R5,R22,R3,R4, 7,0x4bdecfa9,11);
   142  	ROUND3(R4,R5,R22,R3,10,0xf6bb4b60,16);
   143  	ROUND3(R3,R4,R5,R22,13,0xbebfbc70,23);
   144  	ROUND3(R22,R3,R4,R5, 0,0x289b7ec6, 4);
   145  	ROUND3(R5,R22,R3,R4, 3,0xeaa127fa,11);
   146  	ROUND3(R4,R5,R22,R3, 6,0xd4ef3085,16);
   147  	ROUND3(R3,R4,R5,R22, 9, 0x4881d05,23);
   148  	ROUND3(R22,R3,R4,R5,12,0xd9d4d039, 4);
   149  	ROUND3(R5,R22,R3,R4,15,0xe6db99e5,11);
   150  	ROUND3(R4,R5,R22,R3, 2,0x1fa27cf8,16);
   151  	ROUND3(R3,R4,R5,R22, 0,0xc4ac5665,23);
   152  
   153  	ENDIAN_MOVE(0,R6,R8,R21)
   154  	MOVWZ	$0xffffffff, R9
   155  	XOR	R5, R9
   156  
   157  #define ROUND4(a, b, c, d, index, const, shift) \
   158  	ADD	$const, a; \
   159  	ADD	R8, a; \
   160  	OR	b, R9; \
   161  	XOR	c, R9; \
   162  	ADD	R9, a; \
   163  	ENDIAN_MOVE(index*4,R6,R8,R21); \
   164  	MOVWZ	$0xffffffff, R9; \
   165  	RLWMI	$shift, a, $0xffffffff, a; \
   166  	XOR	c, R9; \
   167  	ADD	b, a; \
   168  	MOVWZ	a, a
   169  
   170  	ROUND4(R22,R3,R4,R5, 7,0xf4292244, 6);
   171  	ROUND4(R5,R22,R3,R4,14,0x432aff97,10);
   172  	ROUND4(R4,R5,R22,R3, 5,0xab9423a7,15);
   173  	ROUND4(R3,R4,R5,R22,12,0xfc93a039,21);
   174  	ROUND4(R22,R3,R4,R5, 3,0x655b59c3, 6);
   175  	ROUND4(R5,R22,R3,R4,10,0x8f0ccc92,10);
   176  	ROUND4(R4,R5,R22,R3, 1,0xffeff47d,15);
   177  	ROUND4(R3,R4,R5,R22, 8,0x85845dd1,21);
   178  	ROUND4(R22,R3,R4,R5,15,0x6fa87e4f, 6);
   179  	ROUND4(R5,R22,R3,R4, 6,0xfe2ce6e0,10);
   180  	ROUND4(R4,R5,R22,R3,13,0xa3014314,15);
   181  	ROUND4(R3,R4,R5,R22, 4,0x4e0811a1,21);
   182  	ROUND4(R22,R3,R4,R5,11,0xf7537e82, 6);
   183  	ROUND4(R5,R22,R3,R4, 2,0xbd3af235,10);
   184  	ROUND4(R4,R5,R22,R3, 9,0x2ad7d2bb,15);
   185  	ROUND4(R3,R4,R5,R22, 0,0xeb86d391,21);
   186  
   187  	ADD	R14, R22
   188  	ADD	R15, R3
   189  	ADD	R16, R4
   190  	ADD	R17, R5
   191  	ADD	$64, R6
   192  	CMP	R6, R7
   193  	BLT	loop
   194  
   195  end:
   196  	MOVD	dig+0(FP), R10
   197  	MOVWZ	R22, 0(R10)
   198  	MOVWZ	R3, 4(R10)
   199  	MOVWZ	R4, 8(R10)
   200  	MOVWZ	R5, 12(R10)
   201  	RET