github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/bytealg/compare_arm64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 9 MOVD a_base+0(FP), R2 10 MOVD a_len+8(FP), R0 11 MOVD b_base+24(FP), R3 12 MOVD b_len+32(FP), R1 13 MOVD $ret+48(FP), R7 14 B cmpbody<>(SB) 15 16 TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 17 MOVD a_base+0(FP), R2 18 MOVD a_len+8(FP), R0 19 MOVD b_base+16(FP), R3 20 MOVD b_len+24(FP), R1 21 MOVD $ret+32(FP), R7 22 B cmpbody<>(SB) 23 24 // On entry: 25 // R0 is the length of a 26 // R1 is the length of b 27 // R2 points to the start of a 28 // R3 points to the start of b 29 // R7 points to return value (-1/0/1 will be written here) 30 // 31 // On exit: 32 // R4, R5, R6, R8, R9 and R10 are clobbered 33 TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0 34 CMP R2, R3 35 BEQ samebytes // same starting pointers; compare lengths 36 CMP R0, R1 37 CSEL LT, R1, R0, R6 // R6 is min(R0, R1) 38 39 CMP $0, R6 40 BEQ samebytes 41 BIC $0xf, R6, R10 42 CBZ R10, small // length < 16 43 ADD R2, R10 // end of chunk16 44 // length >= 16 45 chunk16_loop: 46 LDP.P 16(R2), (R4, R8) 47 LDP.P 16(R3), (R5, R9) 48 CMP R4, R5 49 BNE cmp 50 CMP R8, R9 51 BNE cmpnext 52 CMP R10, R2 53 BNE chunk16_loop 54 AND $0xf, R6, R6 55 CBZ R6, samebytes 56 SUBS $8, R6 57 BLT tail 58 // the length of tail > 8 bytes 59 MOVD.P 8(R2), R4 60 MOVD.P 8(R3), R5 61 CMP R4, R5 62 BNE cmp 63 SUB $8, R6 64 // compare last 8 bytes 65 tail: 66 MOVD (R2)(R6), R4 67 MOVD (R3)(R6), R5 68 CMP R4, R5 69 BEQ samebytes 70 cmp: 71 REV R4, R4 72 REV R5, R5 73 CMP R4, R5 74 ret: 75 MOVD $1, R4 76 CNEG HI, R4, R4 77 MOVD R4, (R7) 78 RET 79 small: 80 TBZ $3, R6, lt_8 81 MOVD (R2), R4 82 MOVD (R3), R5 83 CMP R4, R5 84 BNE cmp 85 SUBS $8, R6 86 BEQ samebytes 87 ADD $8, R2 88 ADD $8, R3 89 SUB $8, R6 90 B tail 91 lt_8: 92 TBZ $2, R6, lt_4 93 MOVWU (R2), R4 94 MOVWU (R3), R5 95 CMPW R4, R5 96 BNE cmp 97 SUBS $4, R6 98 BEQ samebytes 99 ADD $4, R2 100 ADD $4, R3 101 lt_4: 102 TBZ $1, R6, lt_2 103 MOVHU (R2), R4 104 MOVHU (R3), R5 105 CMPW R4, R5 106 BNE cmp 107 ADD $2, R2 108 ADD $2, R3 109 lt_2: 110 TBZ $0, R6, samebytes 111 one: 112 MOVBU (R2), R4 113 MOVBU (R3), R5 114 CMPW R4, R5 115 BNE ret 116 samebytes: 117 CMP R1, R0 118 CSET NE, R4 119 CNEG LO, R4, R4 120 MOVD R4, (R7) 121 RET 122 cmpnext: 123 REV R8, R4 124 REV R9, R5 125 CMP R4, R5 126 B ret