github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/compare_ppc64x.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ppc64 ppc64le
     6  
     7  #include "go_asm.h"
     8  #include "textflag.h"
     9  
    10  TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
    11  	MOVD	a_base+0(FP), R5
    12  	MOVD	b_base+24(FP), R6
    13  	MOVD	a_len+8(FP), R3
    14  	CMP	R5,R6,CR7
    15  	MOVD	b_len+32(FP), R4
    16  	MOVD	$ret+48(FP), R7
    17  	CMP	R3,R4,CR6
    18  	BEQ	CR7,equal
    19  
    20  #ifdef	GOARCH_ppc64le
    21  	BR	cmpbodyLE<>(SB)
    22  #else
    23  	BR      cmpbodyBE<>(SB)
    24  #endif
    25  
    26  equal:
    27  	BEQ	CR6,done
    28  	MOVD	$1, R8
    29  	BGT	CR6,greater
    30  	NEG	R8
    31  
    32  greater:
    33  	MOVD	R8, (R7)
    34  	RET
    35  
    36  done:
    37  	MOVD	$0, (R7)
    38  	RET
    39  
    40  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
    41  	MOVD	a_base+0(FP), R5
    42  	MOVD	b_base+16(FP), R6
    43  	MOVD	a_len+8(FP), R3
    44  	CMP	R5,R6,CR7
    45  	MOVD	b_len+24(FP), R4
    46  	MOVD	$ret+32(FP), R7
    47  	CMP	R3,R4,CR6
    48  	BEQ	CR7,equal
    49  
    50  #ifdef	GOARCH_ppc64le
    51  	BR	cmpbodyLE<>(SB)
    52  #else
    53  	BR      cmpbodyBE<>(SB)
    54  #endif
    55  
    56  equal:
    57  	BEQ	CR6,done
    58  	MOVD	$1, R8
    59  	BGT	CR6,greater
    60  	NEG	R8
    61  
    62  greater:
    63  	MOVD	R8, (R7)
    64  	RET
    65  
    66  done:
    67  	MOVD	$0, (R7)
    68  	RET
    69  
    70  // Do an efficient memcmp for ppc64le
    71  // R3 = a len
    72  // R4 = b len
    73  // R5 = a addr
    74  // R6 = b addr
    75  // R7 = addr of return value
    76  TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
    77  	MOVD	R3,R8		// set up length
    78  	CMP	R3,R4,CR2	// unequal?
    79  	BC	12,8,setuplen	// BLT CR2
    80  	MOVD	R4,R8		// use R4 for comparison len
    81  setuplen:
    82  	MOVD	R8,CTR		// set up loop counter
    83  	CMP	R8,$8		// only optimize >=8
    84  	BLT	simplecheck
    85  	DCBT	(R5)		// cache hint
    86  	DCBT	(R6)
    87  	CMP	R8,$32		// optimize >= 32
    88  	MOVD	R8,R9
    89  	BLT	setup8a		// 8 byte moves only
    90  setup32a:
    91  	SRADCC	$5,R8,R9	// number of 32 byte chunks
    92  	MOVD	R9,CTR
    93  
    94          // Special processing for 32 bytes or longer.
    95          // Loading this way is faster and correct as long as the
    96  	// doublewords being compared are equal. Once they
    97  	// are found unequal, reload them in proper byte order
    98  	// to determine greater or less than.
    99  loop32a:
   100  	MOVD	0(R5),R9	// doublewords to compare
   101  	MOVD	0(R6),R10	// get 4 doublewords
   102  	MOVD	8(R5),R14
   103  	MOVD	8(R6),R15
   104  	CMPU	R9,R10		// bytes equal?
   105  	MOVD	$0,R16		// set up for cmpne
   106  	BNE	cmpne		// further compare for LT or GT
   107  	MOVD	16(R5),R9	// get next pair of doublewords
   108  	MOVD	16(R6),R10
   109  	CMPU	R14,R15		// bytes match?
   110  	MOVD	$8,R16		// set up for cmpne
   111  	BNE	cmpne		// further compare for LT or GT
   112  	MOVD	24(R5),R14	// get next pair of doublewords
   113  	MOVD    24(R6),R15
   114  	CMPU	R9,R10		// bytes match?
   115  	MOVD	$16,R16		// set up for cmpne
   116  	BNE	cmpne		// further compare for LT or GT
   117  	MOVD	$-8,R16		// for cmpne, R5,R6 already inc by 32
   118  	ADD	$32,R5		// bump up to next 32
   119  	ADD	$32,R6
   120  	CMPU    R14,R15		// bytes match?
   121  	BC	8,2,loop32a	// br ctr and cr
   122  	BNE	cmpne
   123  	ANDCC	$24,R8,R9	// Any 8 byte chunks?
   124  	BEQ	leftover	// and result is 0
   125  setup8a:
   126  	SRADCC	$3,R9,R9	// get the 8 byte count
   127  	BEQ	leftover	// shifted value is 0
   128  	MOVD	R9,CTR		// loop count for doublewords
   129  loop8:
   130  	MOVDBR	(R5+R0),R9	// doublewords to compare
   131  	MOVDBR	(R6+R0),R10	// LE compare order
   132  	ADD	$8,R5
   133  	ADD	$8,R6
   134  	CMPU	R9,R10		// match?
   135  	BC	8,2,loop8	// bt ctr <> 0 && cr
   136  	BGT	greater
   137  	BLT	less
   138  leftover:
   139  	ANDCC	$7,R8,R9	// check for leftover bytes
   140  	MOVD	R9,CTR		// save the ctr
   141  	BNE	simple		// leftover bytes
   142  	BC	12,10,equal	// test CR2 for length comparison
   143  	BC	12,8,less
   144  	BR	greater
   145  simplecheck:
   146  	CMP	R8,$0		// remaining compare length 0
   147  	BNE	simple		// do simple compare
   148  	BC	12,10,equal	// test CR2 for length comparison
   149  	BC	12,8,less	// 1st len < 2nd len, result less
   150  	BR	greater		// 1st len > 2nd len must be greater
   151  simple:
   152  	MOVBZ	0(R5), R9	// get byte from 1st operand
   153  	ADD	$1,R5
   154  	MOVBZ	0(R6), R10	// get byte from 2nd operand
   155  	ADD	$1,R6
   156  	CMPU	R9, R10
   157  	BC	8,2,simple	// bc ctr <> 0 && cr
   158  	BGT	greater		// 1st > 2nd
   159  	BLT	less		// 1st < 2nd
   160  	BC	12,10,equal	// test CR2 for length comparison
   161  	BC	12,9,greater	// 2nd len > 1st len
   162  	BR	less		// must be less
   163  cmpne:				// only here is not equal
   164  	MOVDBR	(R5+R16),R8	// reload in reverse order
   165  	MOVDBR	(R6+R16),R9
   166  	CMPU	R8,R9		// compare correct endianness
   167  	BGT	greater		// here only if NE
   168  less:
   169  	MOVD	$-1,R3
   170  	MOVD	R3,(R7)		// return value if A < B
   171  	RET
   172  equal:
   173  	MOVD	$0,(R7)		// return value if A == B
   174  	RET
   175  greater:
   176  	MOVD	$1,R3
   177  	MOVD	R3,(R7)		// return value if A > B
   178  	RET
   179  
   180  // Do an efficient memcmp for ppc64 (BE)
   181  // R3 = a len
   182  // R4 = b len
   183  // R5 = a addr
   184  // R6 = b addr
   185  // R7 = addr of return value
   186  TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
   187  	MOVD	R3,R8		// set up length
   188  	CMP	R3,R4,CR2	// unequal?
   189  	BC	12,8,setuplen	// BLT CR2
   190  	MOVD	R4,R8		// use R4 for comparison len
   191  setuplen:
   192  	MOVD	R8,CTR		// set up loop counter
   193  	CMP	R8,$8		// only optimize >=8
   194  	BLT	simplecheck
   195  	DCBT	(R5)		// cache hint
   196  	DCBT	(R6)
   197  	CMP	R8,$32		// optimize >= 32
   198  	MOVD	R8,R9
   199  	BLT	setup8a		// 8 byte moves only
   200  
   201  setup32a:
   202  	SRADCC	$5,R8,R9	// number of 32 byte chunks
   203  	MOVD	R9,CTR
   204  loop32a:
   205  	MOVD	0(R5),R9	// doublewords to compare
   206  	MOVD	0(R6),R10	// get 4 doublewords
   207  	MOVD	8(R5),R14
   208  	MOVD	8(R6),R15
   209  	CMPU	R9,R10		// bytes equal?
   210  	BLT	less		// found to be less
   211  	BGT	greater		// found to be greater
   212  	MOVD	16(R5),R9	// get next pair of doublewords
   213  	MOVD	16(R6),R10
   214  	CMPU	R14,R15		// bytes match?
   215  	BLT	less		// found less
   216  	BGT	greater		// found greater
   217  	MOVD	24(R5),R14	// get next pair of doublewords
   218  	MOVD	24(R6),R15
   219  	CMPU	R9,R10		// bytes match?
   220  	BLT	less		// found to be less
   221  	BGT	greater		// found to be greater
   222  	ADD	$32,R5		// bump up to next 32
   223  	ADD	$32,R6
   224  	CMPU	R14,R15		// bytes match?
   225  	BC	8,2,loop32a	// br ctr and cr
   226  	BLT	less		// with BE, byte ordering is
   227  	BGT	greater		// good for compare
   228  	ANDCC	$24,R8,R9	// Any 8 byte chunks?
   229  	BEQ	leftover	// and result is 0
   230  setup8a:
   231  	SRADCC	$3,R9,R9	// get the 8 byte count
   232  	BEQ	leftover	// shifted value is 0
   233  	MOVD	R9,CTR		// loop count for doublewords
   234  loop8:
   235  	MOVD	(R5),R9
   236  	MOVD	(R6),R10
   237  	ADD	$8,R5
   238  	ADD	$8,R6
   239  	CMPU	R9,R10		// match?
   240  	BC	8,2,loop8	// bt ctr <> 0 && cr
   241  	BGT	greater
   242  	BLT	less
   243  leftover:
   244  	ANDCC	$7,R8,R9	// check for leftover bytes
   245  	MOVD	R9,CTR		// save the ctr
   246  	BNE	simple		// leftover bytes
   247  	BC	12,10,equal	// test CR2 for length comparison
   248  	BC	12,8,less
   249  	BR	greater
   250  simplecheck:
   251  	CMP	R8,$0		// remaining compare length 0
   252  	BNE	simple		// do simple compare
   253  	BC	12,10,equal	// test CR2 for length comparison
   254  	BC 	12,8,less	// 1st len < 2nd len, result less
   255  	BR	greater		// same len, must be equal
   256  simple:
   257  	MOVBZ	0(R5),R9	// get byte from 1st operand
   258  	ADD	$1,R5
   259  	MOVBZ	0(R6),R10	// get byte from 2nd operand
   260  	ADD	$1,R6
   261  	CMPU	R9,R10
   262  	BC	8,2,simple	// bc ctr <> 0 && cr
   263  	BGT	greater		// 1st > 2nd
   264  	BLT	less		// 1st < 2nd
   265  	BC	12,10,equal	// test CR2 for length comparison
   266  	BC	12,9,greater	// 2nd len > 1st len
   267  less:
   268  	MOVD	$-1,R3
   269  	MOVD    R3,(R7)		// return value if A < B
   270  	RET
   271  equal:
   272  	MOVD    $0,(R7)		// return value if A == B
   273  	RET
   274  greater:
   275  	MOVD	$1,R3
   276  	MOVD	R3,(R7)		// return value if A > B
   277  	RET