github.com/primecitizens/pcz/std@v0.2.1/core/cmp/bs_arm64.s (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  // 
     4  // Copyright 2018 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  //go:build pcz && arm64
     9  
    10  #include "textflag.h"
    11  
    12  TEXT ·Bytes<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
    13  	// R0 = a_base (want in R0)
    14  	// R1 = a_len  (want in R1)
    15  	// R2 = a_cap  (unused)
    16  	// R3 = b_base (want in R2)
    17  	// R4 = b_len  (want in R3)
    18  	// R5 = b_cap  (unused)
    19  	MOVD R3, R2
    20  	MOVD R4, R3
    21  	B cmpbody<>(SB)
    22  
    23  TEXT ·String<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
    24  	// R0 = a_base
    25  	// R1 = a_len
    26  	// R2 = b_base
    27  	// R3 = b_len
    28  	B cmpbody<>(SB)
    29  
    30  // On entry:
    31  // R0 points to the start of a
    32  // R1 is the length of a
    33  // R2 points to the start of b
    34  // R3 is the length of b
    35  //
    36  // On exit:
    37  // R0 is the result
    38  // R4, R5, R6, R8, R9 and R10 are clobbered
    39  TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
    40  	CMP R0, R2
    41  	BEQ samebytes         // same starting pointers; compare lengths
    42  	CMP R1, R3
    43  	CSEL LT, R3, R1, R6    // R6 is min(R1, R3)
    44  
    45  	CBZ R6, samebytes
    46  	BIC $0xf, R6, R10
    47  	CBZ R10, small        // length < 16
    48  	ADD R0, R10           // end of chunk16
    49  	// length >= 16
    50  chunk16_loop:
    51  	LDP.P 16(R0), (R4, R8)
    52  	LDP.P 16(R2), (R5, R9)
    53  	CMP R4, R5
    54  	BNE cmp
    55  	CMP R8, R9
    56  	BNE cmpnext
    57  	CMP R10, R0
    58  	BNE chunk16_loop
    59  	AND $0xf, R6, R6
    60  	CBZ R6, samebytes
    61  	SUBS $8, R6
    62  	BLT tail
    63  	// the length of tail > 8 bytes
    64  	MOVD.P 8(R0), R4
    65  	MOVD.P 8(R2), R5
    66  	CMP R4, R5
    67  	BNE cmp
    68  	SUB $8, R6
    69  	// compare last 8 bytes
    70  tail:
    71  	MOVD (R0)(R6), R4
    72  	MOVD (R2)(R6), R5
    73  	CMP R4, R5
    74  	BEQ samebytes
    75  cmp:
    76  	REV R4, R4
    77  	REV R5, R5
    78  	CMP R4, R5
    79  ret:
    80  	MOVD $1, R0
    81  	CNEG HI, R0, R0
    82  	RET
    83  small:
    84  	TBZ $3, R6, lt_8
    85  	MOVD (R0), R4
    86  	MOVD (R2), R5
    87  	CMP R4, R5
    88  	BNE cmp
    89  	SUBS $8, R6
    90  	BEQ samebytes
    91  	ADD $8, R0
    92  	ADD $8, R2
    93  	SUB $8, R6
    94  	B tail
    95  lt_8:
    96  	TBZ $2, R6, lt_4
    97  	MOVWU (R0), R4
    98  	MOVWU (R2), R5
    99  	CMPW R4, R5
   100  	BNE cmp
   101  	SUBS $4, R6
   102  	BEQ samebytes
   103  	ADD $4, R0
   104  	ADD $4, R2
   105  lt_4:
   106  	TBZ $1, R6, lt_2
   107  	MOVHU (R0), R4
   108  	MOVHU (R2), R5
   109  	CMPW R4, R5
   110  	BNE cmp
   111  	ADD $2, R0
   112  	ADD $2, R2
   113  lt_2:
   114  	TBZ $0, R6, samebytes
   115  one:
   116  	MOVBU (R0), R4
   117  	MOVBU (R2), R5
   118  	CMPW R4, R5
   119  	BNE ret
   120  samebytes:
   121  	CMP R3, R1
   122  	CSET NE, R0
   123  	CNEG LO, R0, R0
   124  	RET
   125  cmpnext:
   126  	REV R8, R4
   127  	REV R9, R5
   128  	CMP R4, R5
   129  	B ret