github.com/primecitizens/pcz/std@v0.2.1/core/cmp/bs_arm.s (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 // 4 // Copyright 2018 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 //go:build pcz && arm 9 10 #include "textflag.h" 11 12 TEXT ·Bytes(SB),NOSPLIT|NOFRAME,$0-28 13 MOVW a_base+0(FP), R2 14 MOVW a_len+4(FP), R0 15 MOVW b_base+12(FP), R3 16 MOVW b_len+16(FP), R1 17 ADD $28, R13, R7 18 B cmpbody<>(SB) 19 20 TEXT ·String(SB),NOSPLIT|NOFRAME,$0-20 21 MOVW a_base+0(FP), R2 22 MOVW a_len+4(FP), R0 23 MOVW b_base+8(FP), R3 24 MOVW b_len+12(FP), R1 25 ADD $20, R13, R7 26 B cmpbody<>(SB) 27 28 // On entry: 29 // R0 is the length of a 30 // R1 is the length of b 31 // R2 points to the start of a 32 // R3 points to the start of b 33 // R7 points to return value (-1/0/1 will be written here) 34 // 35 // On exit: 36 // R4, R5, R6 and R8 are clobbered 37 TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0 38 CMP R2, R3 39 BEQ samebytes 40 CMP R0, R1 41 MOVW R0, R6 42 MOVW.LT R1, R6 // R6 is min(R0, R1) 43 44 CMP $0, R6 45 BEQ samebytes 46 CMP $4, R6 47 ADD R2, R6 // R2 is current byte in a, R6 is the end of the range to compare 48 BLT byte_loop // length < 4 49 AND $3, R2, R8 50 CMP $0, R8 51 BNE byte_loop // unaligned a, use byte-wise compare (TODO: try to align a) 52 aligned_a: 53 AND $3, R3, R8 54 CMP $0, R8 55 BNE byte_loop // unaligned b, use byte-wise compare 56 AND $0xfffffffc, R6, R8 57 // length >= 4 58 chunk4_loop: 59 MOVW.P 4(R2), R4 60 MOVW.P 4(R3), R5 61 CMP R4, R5 62 BNE cmp 63 CMP R2, R8 64 BNE chunk4_loop 65 CMP R2, R6 66 BEQ samebytes // all compared bytes were the same; compare lengths 67 byte_loop: 68 MOVBU.P 1(R2), R4 69 MOVBU.P 1(R3), R5 70 CMP R4, R5 71 BNE ret 72 CMP R2, R6 73 BNE byte_loop 74 samebytes: 75 CMP R0, R1 76 MOVW.LT $1, R0 77 MOVW.GT $-1, R0 78 MOVW.EQ $0, R0 79 MOVW R0, (R7) 80 RET 81 ret: 82 // bytes differed 83 MOVW.LT $1, R0 84 MOVW.GT $-1, R0 85 MOVW R0, (R7) 86 RET 87 cmp: 88 SUB $4, R2, R2 89 SUB $4, R3, R3 90 B byte_loop