github.com/JimmyHuang454/JLS-go@v0.0.0-20230831150107-90d536585ba0/internal/bytealg/compare_riscv64.s (about)

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
     9  	// X10 = a_base
    10  	// X11 = a_len
    11  	// X12 = a_cap (unused)
    12  	// X13 = b_base (want in X12)
    13  	// X14 = b_len (want in X13)
    14  	// X15 = b_cap (unused)
    15  	MOV	X13, X12
    16  	MOV	X14, X13
    17  	JMP	compare<>(SB)
    18  
    19  TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
    20  	// X10 = a_base
    21  	// X11 = a_len
    22  	// X12 = b_base
    23  	// X13 = b_len
    24  	JMP	compare<>(SB)
    25  
    26  // On entry:
    27  // X10 points to start of a
    28  // X11 length of a
    29  // X12 points to start of b
    30  // X13 length of b
    31  // for non-regabi X14 points to the address to store the return value (-1/0/1)
    32  // for regabi the return value in X10
    33  TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
    34  	BEQ	X10, X12, cmp_len
    35  
    36  	MOV	X11, X5
    37  	BGE	X13, X5, use_a_len // X5 = min(len(a), len(b))
    38  	MOV	X13, X5
    39  use_a_len:
    40  	BEQZ	X5, cmp_len
    41  
    42  	MOV	$32, X6
    43  	BLT	X5, X6, loop4_check
    44  
    45  	// Check alignment - if alignment differs we have to do one byte at a time.
    46  	AND	$7, X10, X7
    47  	AND	$7, X12, X8
    48  	BNE	X7, X8, loop4_check
    49  	BEQZ	X7, loop32_check
    50  
    51  	// Check one byte at a time until we reach 8 byte alignment.
    52  	SUB	X7, X5, X5
    53  align:
    54  	ADD	$-1, X7
    55  	MOVBU	0(X10), X8
    56  	MOVBU	0(X12), X9
    57  	BNE	X8, X9, cmp
    58  	ADD	$1, X10
    59  	ADD	$1, X12
    60  	BNEZ	X7, align
    61  
    62  loop32_check:
    63  	MOV	$32, X7
    64  	BLT	X5, X7, loop16_check
    65  loop32:
    66  	MOV	0(X10), X15
    67  	MOV	0(X12), X16
    68  	MOV	8(X10), X17
    69  	MOV	8(X12), X18
    70  	BEQ	X15, X16, loop32a
    71  	JMP	cmp8a
    72  loop32a:
    73  	BEQ	X17, X18, loop32b
    74  	JMP	cmp8b
    75  loop32b:
    76  	MOV	16(X10), X15
    77  	MOV	16(X12), X16
    78  	MOV	24(X10), X17
    79  	MOV	24(X12), X18
    80  	BEQ	X15, X16, loop32c
    81  	JMP	cmp8a
    82  loop32c:
    83  	BEQ	X17, X18, loop32d
    84  	JMP	cmp8b
    85  loop32d:
    86  	ADD	$32, X10
    87  	ADD	$32, X12
    88  	ADD	$-32, X5
    89  	BGE	X5, X7, loop32
    90  	BEQZ	X5, cmp_len
    91  
    92  loop16_check:
    93  	MOV	$16, X6
    94  	BLT	X5, X6, loop4_check
    95  loop16:
    96  	MOV	0(X10), X15
    97  	MOV	0(X12), X16
    98  	MOV	8(X10), X17
    99  	MOV	8(X12), X18
   100  	BEQ	X15, X16, loop16a
   101  	JMP	cmp8a
   102  loop16a:
   103  	BEQ	X17, X18, loop16b
   104  	JMP	cmp8b
   105  loop16b:
   106  	ADD	$16, X10
   107  	ADD	$16, X12
   108  	ADD	$-16, X5
   109  	BGE	X5, X6, loop16
   110  	BEQZ	X5, cmp_len
   111  
   112  loop4_check:
   113  	MOV	$4, X6
   114  	BLT	X5, X6, loop1
   115  loop4:
   116  	MOVBU	0(X10), X8
   117  	MOVBU	0(X12), X9
   118  	MOVBU	1(X10), X15
   119  	MOVBU	1(X12), X16
   120  	BEQ	X8, X9, loop4a
   121  	SLTU	X9, X8, X5
   122  	SLTU	X8, X9, X6
   123  	JMP	cmp_ret
   124  loop4a:
   125  	BEQ	X15, X16, loop4b
   126  	SLTU	X16, X15, X5
   127  	SLTU	X15, X16, X6
   128  	JMP	cmp_ret
   129  loop4b:
   130  	MOVBU	2(X10), X21
   131  	MOVBU	2(X12), X22
   132  	MOVBU	3(X10), X23
   133  	MOVBU	3(X12), X24
   134  	BEQ	X21, X22, loop4c
   135  	SLTU	X22, X21, X5
   136  	SLTU	X21, X22, X6
   137  	JMP	cmp_ret
   138  loop4c:
   139  	BEQ	X23, X24, loop4d
   140  	SLTU	X24, X23, X5
   141  	SLTU	X23, X24, X6
   142  	JMP	cmp_ret
   143  loop4d:
   144  	ADD	$4, X10
   145  	ADD	$4, X12
   146  	ADD	$-4, X5
   147  	BGE	X5, X6, loop4
   148  
   149  loop1:
   150  	BEQZ	X5, cmp_len
   151  	MOVBU	0(X10), X8
   152  	MOVBU	0(X12), X9
   153  	BNE	X8, X9, cmp
   154  	ADD	$1, X10
   155  	ADD	$1, X12
   156  	ADD	$-1, X5
   157  	JMP	loop1
   158  
   159  	// Compare 8 bytes of memory in X15/X16 that are known to differ.
   160  cmp8a:
   161  	MOV	$0xff, X19
   162  cmp8a_loop:
   163  	AND	X15, X19, X8
   164  	AND	X16, X19, X9
   165  	BNE	X8, X9, cmp
   166  	SLLI	$8, X19
   167  	JMP	cmp8a_loop
   168  
   169  	// Compare 8 bytes of memory in X17/X18 that are known to differ.
   170  cmp8b:
   171  	MOV	$0xff, X19
   172  cmp8b_loop:
   173  	AND	X17, X19, X8
   174  	AND	X18, X19, X9
   175  	BNE	X8, X9, cmp
   176  	SLLI	$8, X19
   177  	JMP	cmp8b_loop
   178  
   179  cmp_len:
   180  	MOV	X11, X8
   181  	MOV	X13, X9
   182  cmp:
   183  	SLTU	X9, X8, X5
   184  	SLTU	X8, X9, X6
   185  cmp_ret:
   186  	SUB	X5, X6, X10
   187  	RET