github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/crypto/md5/md5block_ppc64le.s (about)

     1  // Original source:
     2  //	http://www.zorinaq.com/papers/md5-amd64.html
     3  //	http://www.zorinaq.com/papers/md5-amd64.tar.bz2
     4  //
     5  // MD5 optimized for ppc64le using Go's assembler for
     6  // ppc64le, based on md5block_amd64.s implementation by
     7  // the Go authors.
     8  //
     9  // Author: Marc Bevand <bevand_m (at) epita.fr>
    10  // Licence: I hereby disclaim the copyright on this code and place it
    11  // in the public domain.
    12  
    13  #include "textflag.h"
    14  
    15  // TODO: Could be updated for ppc64 big endian
    16  // by using the correct byte reverse instruction.
    17  // Changes required in the Go assembler to make
    18  // that instruction work.
    19  
    20  #define MOVE_LITTLE_ENDIAN MOVWZ
    21  
    22  TEXT ·block(SB),NOSPLIT,$0-32
    23  	MOVD	dig+0(FP), R10
    24  	MOVD	p+8(FP), R6
    25  	MOVD	p_len+16(FP), R5
    26  	SLD	$6, R5
    27  	SRD	$6, R5
    28  	ADD	R6, R5, R7
    29  
    30  	MOVWZ	0(R10), R22
    31  	MOVWZ	4(R10), R3
    32  	MOVWZ	8(R10), R4
    33  	MOVWZ	12(R10), R5
    34  	CMP	R6, R7
    35  	BEQ	end
    36  
    37  loop:
    38  	MOVWZ	R22, R14
    39  	MOVWZ	R3, R15
    40  	MOVWZ	R4, R16
    41  	MOVWZ	R5, R17
    42  
    43  	MOVE_LITTLE_ENDIAN	0(R6), R8
    44  	MOVWZ	R5, R9
    45  
    46  #define ROUND1(a, b, c, d, index, const, shift) \
    47  	XOR	c, R9; \
    48  	ADD	$const, a; \
    49  	ADD	R8, a; \
    50  	AND	b, R9; \
    51  	XOR	d, R9; \
    52  	MOVE_LITTLE_ENDIAN	(index*4)(R6), R8; \
    53  	ADD	R9, a; \
    54  	RLWMI	$shift, a, $0xffffffff, a; \
    55  	MOVWZ	c, R9; \
    56  	ADD	b, a; \
    57  	MOVWZ	a, a
    58  
    59  	ROUND1(R22,R3,R4,R5, 1,0xd76aa478, 7);
    60  	ROUND1(R5,R22,R3,R4, 2,0xe8c7b756,12);
    61  	ROUND1(R4,R5,R22,R3, 3,0x242070db,17);
    62  	ROUND1(R3,R4,R5,R22, 4,0xc1bdceee,22);
    63  	ROUND1(R22,R3,R4,R5, 5,0xf57c0faf, 7);
    64  	ROUND1(R5,R22,R3,R4, 6,0x4787c62a,12);
    65  	ROUND1(R4,R5,R22,R3, 7,0xa8304613,17);
    66  	ROUND1(R3,R4,R5,R22, 8,0xfd469501,22);
    67  	ROUND1(R22,R3,R4,R5, 9,0x698098d8, 7);
    68  	ROUND1(R5,R22,R3,R4,10,0x8b44f7af,12);
    69  	ROUND1(R4,R5,R22,R3,11,0xffff5bb1,17);
    70  	ROUND1(R3,R4,R5,R22,12,0x895cd7be,22);
    71  	ROUND1(R22,R3,R4,R5,13,0x6b901122, 7);
    72  	ROUND1(R5,R22,R3,R4,14,0xfd987193,12);
    73  	ROUND1(R4,R5,R22,R3,15,0xa679438e,17);
    74  	ROUND1(R3,R4,R5,R22, 0,0x49b40821,22);
    75  
    76  	MOVE_LITTLE_ENDIAN	(1*4)(R6), R8
    77  	MOVWZ	R5, R9
    78  	MOVWZ	R5, R10
    79  
    80  #define ROUND2(a, b, c, d, index, const, shift) \
    81  	XOR	$0xffffffff, R9; \ // NOTW R9
    82  	ADD	$const, a; \
    83  	ADD	R8, a; \
    84  	AND	b, R10; \
    85  	AND	c, R9; \
    86  	MOVE_LITTLE_ENDIAN	(index*4)(R6), R8; \
    87  	OR	R9, R10; \
    88  	MOVWZ	c, R9; \
    89  	ADD	R10, a; \
    90  	MOVWZ	c, R10; \
    91  	RLWMI	$shift, a, $0xffffffff, a; \
    92  	ADD	b, a; \
    93  	MOVWZ	a, a
    94  
    95  	ROUND2(R22,R3,R4,R5, 6,0xf61e2562, 5);
    96  	ROUND2(R5,R22,R3,R4,11,0xc040b340, 9);
    97  	ROUND2(R4,R5,R22,R3, 0,0x265e5a51,14);
    98  	ROUND2(R3,R4,R5,R22, 5,0xe9b6c7aa,20);
    99  	ROUND2(R22,R3,R4,R5,10,0xd62f105d, 5);
   100  	ROUND2(R5,R22,R3,R4,15, 0x2441453, 9);
   101  	ROUND2(R4,R5,R22,R3, 4,0xd8a1e681,14);
   102  	ROUND2(R3,R4,R5,R22, 9,0xe7d3fbc8,20);
   103  	ROUND2(R22,R3,R4,R5,14,0x21e1cde6, 5);
   104  	ROUND2(R5,R22,R3,R4, 3,0xc33707d6, 9);
   105  	ROUND2(R4,R5,R22,R3, 8,0xf4d50d87,14);
   106  	ROUND2(R3,R4,R5,R22,13,0x455a14ed,20);
   107  	ROUND2(R22,R3,R4,R5, 2,0xa9e3e905, 5);
   108  	ROUND2(R5,R22,R3,R4, 7,0xfcefa3f8, 9);
   109  	ROUND2(R4,R5,R22,R3,12,0x676f02d9,14);
   110  	ROUND2(R3,R4,R5,R22, 0,0x8d2a4c8a,20);
   111  
   112  	MOVE_LITTLE_ENDIAN	(5*4)(R6), R8
   113  	MOVWZ	R4, R9
   114  
   115  #define ROUND3(a, b, c, d, index, const, shift) \
   116  	ADD	$const, a; \
   117  	ADD	R8, a; \
   118  	MOVE_LITTLE_ENDIAN	(index*4)(R6), R8; \
   119  	XOR	d, R9; \
   120  	XOR	b, R9; \
   121  	ADD	R9, a; \
   122  	RLWMI	$shift, a, $0xffffffff, a; \
   123  	MOVWZ	b, R9; \
   124  	ADD	b, a; \
   125  	MOVWZ	a, a
   126  
   127  	ROUND3(R22,R3,R4,R5, 8,0xfffa3942, 4);
   128  	ROUND3(R5,R22,R3,R4,11,0x8771f681,11);
   129  	ROUND3(R4,R5,R22,R3,14,0x6d9d6122,16);
   130  	ROUND3(R3,R4,R5,R22, 1,0xfde5380c,23);
   131  	ROUND3(R22,R3,R4,R5, 4,0xa4beea44, 4);
   132  	ROUND3(R5,R22,R3,R4, 7,0x4bdecfa9,11);
   133  	ROUND3(R4,R5,R22,R3,10,0xf6bb4b60,16);
   134  	ROUND3(R3,R4,R5,R22,13,0xbebfbc70,23);
   135  	ROUND3(R22,R3,R4,R5, 0,0x289b7ec6, 4);
   136  	ROUND3(R5,R22,R3,R4, 3,0xeaa127fa,11);
   137  	ROUND3(R4,R5,R22,R3, 6,0xd4ef3085,16);
   138  	ROUND3(R3,R4,R5,R22, 9, 0x4881d05,23);
   139  	ROUND3(R22,R3,R4,R5,12,0xd9d4d039, 4);
   140  	ROUND3(R5,R22,R3,R4,15,0xe6db99e5,11);
   141  	ROUND3(R4,R5,R22,R3, 2,0x1fa27cf8,16);
   142  	ROUND3(R3,R4,R5,R22, 0,0xc4ac5665,23);
   143  
   144  	MOVE_LITTLE_ENDIAN	(0*4)(R6), R8
   145  	MOVWZ	$0xffffffff, R9
   146  	XOR	R5, R9
   147  
   148  #define ROUND4(a, b, c, d, index, const, shift) \
   149  	ADD	$const, a; \
   150  	ADD	R8, a; \
   151  	OR	b, R9; \
   152  	XOR	c, R9; \
   153  	ADD	R9, a; \
   154  	MOVE_LITTLE_ENDIAN	(index*4)(R6), R8; \
   155  	MOVWZ	$0xffffffff, R9; \
   156  	RLWMI	$shift, a, $0xffffffff, a; \
   157  	XOR	c, R9; \
   158  	ADD	b, a; \
   159  	MOVWZ	a, a
   160  
   161  	ROUND4(R22,R3,R4,R5, 7,0xf4292244, 6);
   162  	ROUND4(R5,R22,R3,R4,14,0x432aff97,10);
   163  	ROUND4(R4,R5,R22,R3, 5,0xab9423a7,15);
   164  	ROUND4(R3,R4,R5,R22,12,0xfc93a039,21);
   165  	ROUND4(R22,R3,R4,R5, 3,0x655b59c3, 6);
   166  	ROUND4(R5,R22,R3,R4,10,0x8f0ccc92,10);
   167  	ROUND4(R4,R5,R22,R3, 1,0xffeff47d,15);
   168  	ROUND4(R3,R4,R5,R22, 8,0x85845dd1,21);
   169  	ROUND4(R22,R3,R4,R5,15,0x6fa87e4f, 6);
   170  	ROUND4(R5,R22,R3,R4, 6,0xfe2ce6e0,10);
   171  	ROUND4(R4,R5,R22,R3,13,0xa3014314,15);
   172  	ROUND4(R3,R4,R5,R22, 4,0x4e0811a1,21);
   173  	ROUND4(R22,R3,R4,R5,11,0xf7537e82, 6);
   174  	ROUND4(R5,R22,R3,R4, 2,0xbd3af235,10);
   175  	ROUND4(R4,R5,R22,R3, 9,0x2ad7d2bb,15);
   176  	ROUND4(R3,R4,R5,R22, 0,0xeb86d391,21);
   177  
   178  	ADD	R14, R22
   179  	ADD	R15, R3
   180  	ADD	R16, R4
   181  	ADD	R17, R5
   182  	ADD	$64, R6
   183  	CMP	R6, R7
   184  	BLT	loop
   185  
   186  end:
   187  	MOVD	dig+0(FP), R10
   188  	MOVWZ	R22, 0(R10)
   189  	MOVWZ	R3, 4(R10)
   190  	MOVWZ	R4, 8(R10)
   191  	MOVWZ	R5, 12(R10)
   192  	RET