github.com/primecitizens/pcz/std@v0.2.1/core/cmp/bs_arm64.s (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 // 4 // Copyright 2018 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 //go:build pcz && arm64 9 10 #include "textflag.h" 11 12 TEXT ·Bytes<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56 13 // R0 = a_base (want in R0) 14 // R1 = a_len (want in R1) 15 // R2 = a_cap (unused) 16 // R3 = b_base (want in R2) 17 // R4 = b_len (want in R3) 18 // R5 = b_cap (unused) 19 MOVD R3, R2 20 MOVD R4, R3 21 B cmpbody<>(SB) 22 23 TEXT ·String<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 24 // R0 = a_base 25 // R1 = a_len 26 // R2 = b_base 27 // R3 = b_len 28 B cmpbody<>(SB) 29 30 // On entry: 31 // R0 points to the start of a 32 // R1 is the length of a 33 // R2 points to the start of b 34 // R3 is the length of b 35 // 36 // On exit: 37 // R0 is the result 38 // R4, R5, R6, R8, R9 and R10 are clobbered 39 TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0 40 CMP R0, R2 41 BEQ samebytes // same starting pointers; compare lengths 42 CMP R1, R3 43 CSEL LT, R3, R1, R6 // R6 is min(R1, R3) 44 45 CBZ R6, samebytes 46 BIC $0xf, R6, R10 47 CBZ R10, small // length < 16 48 ADD R0, R10 // end of chunk16 49 // length >= 16 50 chunk16_loop: 51 LDP.P 16(R0), (R4, R8) 52 LDP.P 16(R2), (R5, R9) 53 CMP R4, R5 54 BNE cmp 55 CMP R8, R9 56 BNE cmpnext 57 CMP R10, R0 58 BNE chunk16_loop 59 AND $0xf, R6, R6 60 CBZ R6, samebytes 61 SUBS $8, R6 62 BLT tail 63 // the length of tail > 8 bytes 64 MOVD.P 8(R0), R4 65 MOVD.P 8(R2), R5 66 CMP R4, R5 67 BNE cmp 68 SUB $8, R6 69 // compare last 8 bytes 70 tail: 71 MOVD (R0)(R6), R4 72 MOVD (R2)(R6), R5 73 CMP R4, R5 74 BEQ samebytes 75 cmp: 76 REV R4, R4 77 REV R5, R5 78 CMP R4, R5 79 ret: 80 MOVD $1, R0 81 CNEG HI, R0, R0 82 RET 83 small: 84 TBZ $3, R6, lt_8 85 MOVD (R0), R4 86 MOVD (R2), R5 87 CMP R4, R5 88 BNE cmp 89 SUBS $8, R6 90 BEQ samebytes 91 ADD $8, R0 92 ADD $8, R2 93 SUB $8, R6 94 B tail 95 lt_8: 96 TBZ $2, R6, lt_4 97 MOVWU (R0), R4 98 MOVWU (R2), R5 99 CMPW R4, R5 100 BNE cmp 101 SUBS $4, R6 102 BEQ samebytes 103 ADD $4, R0 104 ADD $4, R2 105 lt_4: 106 TBZ $1, R6, lt_2 107 MOVHU (R0), R4 108 MOVHU (R2), R5 109 CMPW R4, R5 110 BNE cmp 111 ADD $2, R0 112 ADD $2, R2 113 lt_2: 114 TBZ $0, R6, samebytes 115 one: 116 MOVBU (R0), R4 117 MOVBU (R2), R5 118 CMPW R4, R5 119 BNE ret 120 samebytes: 121 CMP R3, R1 122 CSET NE, R0 123 CNEG LO, R0, R0 124 RET 125 cmpnext: 126 REV R8, R4 127 REV R9, R5 128 CMP R4, R5 129 B ret