github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/compare_arm64.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
     9  	MOVD	a_base+0(FP), R2
    10  	MOVD	a_len+8(FP), R0
    11  	MOVD	b_base+24(FP), R3
    12  	MOVD	b_len+32(FP), R1
    13  	MOVD	$ret+48(FP), R7
    14  	B	cmpbody<>(SB)
    15  
    16  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
    17  	MOVD	a_base+0(FP), R2
    18  	MOVD	a_len+8(FP), R0
    19  	MOVD	b_base+16(FP), R3
    20  	MOVD	b_len+24(FP), R1
    21  	MOVD	$ret+32(FP), R7
    22  	B	cmpbody<>(SB)
    23  
    24  // On entry:
    25  // R0 is the length of a
    26  // R1 is the length of b
    27  // R2 points to the start of a
    28  // R3 points to the start of b
    29  // R7 points to return value (-1/0/1 will be written here)
    30  //
    31  // On exit:
    32  // R4, R5, R6, R8, R9 and R10 are clobbered
    33  TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
    34  	CMP	R2, R3
    35  	BEQ	samebytes         // same starting pointers; compare lengths
    36  	CMP	R0, R1
    37  	CSEL	LT, R1, R0, R6    // R6 is min(R0, R1)
    38  
    39  	CBZ	R6, samebytes
    40  	BIC	$0xf, R6, R10
    41  	CBZ	R10, small        // length < 16
    42  	ADD	R2, R10           // end of chunk16
    43  	// length >= 16
    44  chunk16_loop:
    45  	LDP.P	16(R2), (R4, R8)
    46  	LDP.P	16(R3), (R5, R9)
    47  	CMP	R4, R5
    48  	BNE	cmp
    49  	CMP	R8, R9
    50  	BNE	cmpnext
    51  	CMP	R10, R2
    52  	BNE	chunk16_loop
    53  	AND	$0xf, R6, R6
    54  	CBZ	R6, samebytes
    55  	SUBS	$8, R6
    56  	BLT	tail
    57  	// the length of tail > 8 bytes
    58  	MOVD.P	8(R2), R4
    59  	MOVD.P	8(R3), R5
    60  	CMP	R4, R5
    61  	BNE	cmp
    62  	SUB	$8, R6
    63  	// compare last 8 bytes
    64  tail:
    65  	MOVD	(R2)(R6), R4
    66  	MOVD	(R3)(R6), R5
    67  	CMP	R4, R5
    68  	BEQ	samebytes
    69  cmp:
    70  	REV	R4, R4
    71  	REV	R5, R5
    72  	CMP	R4, R5
    73  ret:
    74  	MOVD	$1, R4
    75  	CNEG	HI, R4, R4
    76  	MOVD	R4, (R7)
    77  	RET
    78  small:
    79  	TBZ	$3, R6, lt_8
    80  	MOVD	(R2), R4
    81  	MOVD	(R3), R5
    82  	CMP	R4, R5
    83  	BNE	cmp
    84  	SUBS	$8, R6
    85  	BEQ	samebytes
    86  	ADD	$8, R2
    87  	ADD	$8, R3
    88  	SUB	$8, R6
    89  	B	tail
    90  lt_8:
    91  	TBZ	$2, R6, lt_4
    92  	MOVWU	(R2), R4
    93  	MOVWU	(R3), R5
    94  	CMPW	R4, R5
    95  	BNE	cmp
    96  	SUBS	$4, R6
    97  	BEQ	samebytes
    98  	ADD	$4, R2
    99  	ADD	$4, R3
   100  lt_4:
   101  	TBZ	$1, R6, lt_2
   102  	MOVHU	(R2), R4
   103  	MOVHU	(R3), R5
   104  	CMPW	R4, R5
   105  	BNE	cmp
   106  	ADD	$2, R2
   107  	ADD	$2, R3
   108  lt_2:
   109  	TBZ	$0, R6, samebytes
   110  one:
   111  	MOVBU	(R2), R4
   112  	MOVBU	(R3), R5
   113  	CMPW	R4, R5
   114  	BNE	ret
   115  samebytes:
   116  	CMP	R1, R0
   117  	CSET	NE, R4
   118  	CNEG	LO, R4, R4
   119  	MOVD	R4, (R7)
   120  	RET
   121  cmpnext:
   122  	REV	R8, R4
   123  	REV	R9, R5
   124  	CMP	R4, R5
   125  	B	ret