github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/internal/bytealg/compare_arm64.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
     9  	MOVD	a_base+0(FP), R2
    10  	MOVD	a_len+8(FP), R0
    11  	MOVD	b_base+24(FP), R3
    12  	MOVD	b_len+32(FP), R1
    13  	MOVD	$ret+48(FP), R7
    14  	B	cmpbody<>(SB)
    15  
    16  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
    17  	MOVD	a_base+0(FP), R2
    18  	MOVD	a_len+8(FP), R0
    19  	MOVD	b_base+16(FP), R3
    20  	MOVD	b_len+24(FP), R1
    21  	MOVD	$ret+32(FP), R7
    22  	B	cmpbody<>(SB)
    23  
    24  // On entry:
    25  // R0 is the length of a
    26  // R1 is the length of b
    27  // R2 points to the start of a
    28  // R3 points to the start of b
    29  // R7 points to return value (-1/0/1 will be written here)
    30  //
    31  // On exit:
    32  // R4, R5, R6, R8, R9 and R10 are clobbered
    33  TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
    34  	CMP	R2, R3
    35  	BEQ	samebytes         // same starting pointers; compare lengths
    36  	CMP	R0, R1
    37  	CSEL	LT, R1, R0, R6    // R6 is min(R0, R1)
    38  
    39  	CMP	$0, R6
    40  	BEQ	samebytes
    41  	BIC	$0xf, R6, R10
    42  	CBZ	R10, small        // length < 16
    43  	ADD	R2, R10           // end of chunk16
    44  	// length >= 16
    45  chunk16_loop:
    46  	LDP.P	16(R2), (R4, R8)
    47  	LDP.P	16(R3), (R5, R9)
    48  	CMP	R4, R5
    49  	BNE	cmp
    50  	CMP	R8, R9
    51  	BNE	cmpnext
    52  	CMP	R10, R2
    53  	BNE	chunk16_loop
    54  	AND	$0xf, R6, R6
    55  	CBZ	R6, samebytes
    56  	SUBS	$8, R6
    57  	BLT	tail
    58  	// the length of tail > 8 bytes
    59  	MOVD.P	8(R2), R4
    60  	MOVD.P	8(R3), R5
    61  	CMP	R4, R5
    62  	BNE	cmp
    63  	SUB	$8, R6
    64  	// compare last 8 bytes
    65  tail:
    66  	MOVD	(R2)(R6), R4
    67  	MOVD	(R3)(R6), R5
    68  	CMP	R4, R5
    69  	BEQ	samebytes
    70  cmp:
    71  	REV	R4, R4
    72  	REV	R5, R5
    73  	CMP	R4, R5
    74  ret:
    75  	MOVD	$1, R4
    76  	CNEG	HI, R4, R4
    77  	MOVD	R4, (R7)
    78  	RET
    79  small:
    80  	TBZ	$3, R6, lt_8
    81  	MOVD	(R2), R4
    82  	MOVD	(R3), R5
    83  	CMP	R4, R5
    84  	BNE	cmp
    85  	SUBS	$8, R6
    86  	BEQ	samebytes
    87  	ADD	$8, R2
    88  	ADD	$8, R3
    89  	SUB	$8, R6
    90  	B	tail
    91  lt_8:
    92  	TBZ	$2, R6, lt_4
    93  	MOVWU	(R2), R4
    94  	MOVWU	(R3), R5
    95  	CMPW	R4, R5
    96  	BNE	cmp
    97  	SUBS	$4, R6
    98  	BEQ	samebytes
    99  	ADD	$4, R2
   100  	ADD	$4, R3
   101  lt_4:
   102  	TBZ	$1, R6, lt_2
   103  	MOVHU	(R2), R4
   104  	MOVHU	(R3), R5
   105  	CMPW	R4, R5
   106  	BNE	cmp
   107  	ADD	$2, R2
   108  	ADD	$2, R3
   109  lt_2:
   110  	TBZ	$0, R6, samebytes
   111  one:
   112  	MOVBU	(R2), R4
   113  	MOVBU	(R3), R5
   114  	CMPW	R4, R5
   115  	BNE	ret
   116  samebytes:
   117  	CMP	R1, R0
   118  	CSET	NE, R4
   119  	CNEG	LO, R4, R4
   120  	MOVD	R4, (R7)
   121  	RET
   122  cmpnext:
   123  	REV	R8, R4
   124  	REV	R9, R5
   125  	CMP	R4, R5
   126  	B	ret