github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/internal/bytealg/compare_riscv64.s (about)

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
     9  	// X10 = a_base
    10  	// X11 = a_len
    11  	// X12 = a_cap (unused)
    12  	// X13 = b_base (want in X12)
    13  	// X14 = b_len (want in X13)
    14  	// X15 = b_cap (unused)
    15  	MOV	X13, X12
    16  	MOV	X14, X13
    17  	JMP	compare<>(SB)
    18  
    19  TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
    20  	// X10 = a_base
    21  	// X11 = a_len
    22  	// X12 = b_base
    23  	// X13 = b_len
    24  	JMP	compare<>(SB)
    25  
    26  // On entry:
    27  // X10 points to start of a
    28  // X11 length of a
    29  // X12 points to start of b
    30  // X13 length of b
    31  // for non-regabi X14 points to the address to store the return value (-1/0/1)
    32  // for regabi the return value in X10
    33  TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
    34  	BEQ	X10, X12, cmp_len
    35  
    36  	MOV	X11, X5
    37  	BGE	X13, X5, use_a_len // X5 = min(len(a), len(b))
    38  	MOV	X13, X5
    39  use_a_len:
    40  	BEQZ	X5, cmp_len
    41  
    42  	MOV	$32, X6
    43  	BLT	X5, X6, check8_unaligned
    44  
    45  	// Check alignment - if alignment differs we have to do one byte at a time.
    46  	AND	$7, X10, X7
    47  	AND	$7, X12, X8
    48  	BNE	X7, X8, check8_unaligned
    49  	BEQZ	X7, compare32
    50  
    51  	// Check one byte at a time until we reach 8 byte alignment.
    52  	SUB	X7, X5, X5
    53  align:
    54  	ADD	$-1, X7
    55  	MOVBU	0(X10), X8
    56  	MOVBU	0(X12), X9
    57  	BNE	X8, X9, cmp
    58  	ADD	$1, X10
    59  	ADD	$1, X12
    60  	BNEZ	X7, align
    61  
    62  check32:
    63  	MOV	$32, X6
    64  	BLT	X5, X6, compare16
    65  compare32:
    66  	MOV	0(X10), X15
    67  	MOV	0(X12), X16
    68  	MOV	8(X10), X17
    69  	MOV	8(X12), X18
    70  	BNE	X15, X16, cmp8a
    71  	BNE	X17, X18, cmp8b
    72  	MOV	16(X10), X15
    73  	MOV	16(X12), X16
    74  	MOV	24(X10), X17
    75  	MOV	24(X12), X18
    76  	BNE	X15, X16, cmp8a
    77  	BNE	X17, X18, cmp8b
    78  	ADD	$32, X10
    79  	ADD	$32, X12
    80  	ADD	$-32, X5
    81  	BGE	X5, X6, compare32
    82  	BEQZ	X5, cmp_len
    83  
    84  check16:
    85  	MOV	$16, X6
    86  	BLT	X5, X6, check8_unaligned
    87  compare16:
    88  	MOV	0(X10), X15
    89  	MOV	0(X12), X16
    90  	MOV	8(X10), X17
    91  	MOV	8(X12), X18
    92  	BNE	X15, X16, cmp8a
    93  	BNE	X17, X18, cmp8b
    94  	ADD	$16, X10
    95  	ADD	$16, X12
    96  	ADD	$-16, X5
    97  	BEQZ	X5, cmp_len
    98  
    99  check8_unaligned:
   100  	MOV	$8, X6
   101  	BLT	X5, X6, check4_unaligned
   102  compare8_unaligned:
   103  	MOVBU	0(X10), X8
   104  	MOVBU	1(X10), X15
   105  	MOVBU	2(X10), X17
   106  	MOVBU	3(X10), X19
   107  	MOVBU	4(X10), X21
   108  	MOVBU	5(X10), X23
   109  	MOVBU	6(X10), X25
   110  	MOVBU	7(X10), X29
   111  	MOVBU	0(X12), X9
   112  	MOVBU	1(X12), X16
   113  	MOVBU	2(X12), X18
   114  	MOVBU	3(X12), X20
   115  	MOVBU	4(X12), X22
   116  	MOVBU	5(X12), X24
   117  	MOVBU	6(X12), X28
   118  	MOVBU	7(X12), X30
   119  	BNE	X8, X9, cmp1a
   120  	BNE	X15, X16, cmp1b
   121  	BNE	X17, X18, cmp1c
   122  	BNE	X19, X20, cmp1d
   123  	BNE	X21, X22, cmp1e
   124  	BNE	X23, X24, cmp1f
   125  	BNE	X25, X28, cmp1g
   126  	BNE	X29, X30, cmp1h
   127  	ADD	$8, X10
   128  	ADD	$8, X12
   129  	ADD	$-8, X5
   130  	BGE	X5, X6, compare8_unaligned
   131  	BEQZ	X5, cmp_len
   132  
   133  check4_unaligned:
   134  	MOV	$4, X6
   135  	BLT	X5, X6, compare1
   136  compare4_unaligned:
   137  	MOVBU	0(X10), X8
   138  	MOVBU	1(X10), X15
   139  	MOVBU	2(X10), X17
   140  	MOVBU	3(X10), X19
   141  	MOVBU	0(X12), X9
   142  	MOVBU	1(X12), X16
   143  	MOVBU	2(X12), X18
   144  	MOVBU	3(X12), X20
   145  	BNE	X8, X9, cmp1a
   146  	BNE	X15, X16, cmp1b
   147  	BNE	X17, X18, cmp1c
   148  	BNE	X19, X20, cmp1d
   149  	ADD	$4, X10
   150  	ADD	$4, X12
   151  	ADD	$-4, X5
   152  	BGE	X5, X6, compare4_unaligned
   153  
   154  compare1:
   155  	BEQZ	X5, cmp_len
   156  	MOVBU	0(X10), X8
   157  	MOVBU	0(X12), X9
   158  	BNE	X8, X9, cmp
   159  	ADD	$1, X10
   160  	ADD	$1, X12
   161  	ADD	$-1, X5
   162  	JMP	compare1
   163  
   164  	// Compare 8 bytes of memory in X15/X16 that are known to differ.
   165  cmp8a:
   166  	MOV	X15, X17
   167  	MOV	X16, X18
   168  
   169  	// Compare 8 bytes of memory in X17/X18 that are known to differ.
   170  cmp8b:
   171  	MOV	$0xff, X19
   172  cmp8_loop:
   173  	AND	X17, X19, X8
   174  	AND	X18, X19, X9
   175  	BNE	X8, X9, cmp
   176  	SLLI	$8, X19
   177  	JMP	cmp8_loop
   178  
   179  cmp1a:
   180  	SLTU	X9, X8, X5
   181  	SLTU	X8, X9, X6
   182  	JMP	cmp_ret
   183  cmp1b:
   184  	SLTU	X16, X15, X5
   185  	SLTU	X15, X16, X6
   186  	JMP	cmp_ret
   187  cmp1c:
   188  	SLTU	X18, X17, X5
   189  	SLTU	X17, X18, X6
   190  	JMP	cmp_ret
   191  cmp1d:
   192  	SLTU	X20, X19, X5
   193  	SLTU	X19, X20, X6
   194  	JMP	cmp_ret
   195  cmp1e:
   196  	SLTU	X22, X21, X5
   197  	SLTU	X21, X22, X6
   198  	JMP	cmp_ret
   199  cmp1f:
   200  	SLTU	X24, X23, X5
   201  	SLTU	X23, X24, X6
   202  	JMP	cmp_ret
   203  cmp1g:
   204  	SLTU	X28, X25, X5
   205  	SLTU	X25, X28, X6
   206  	JMP	cmp_ret
   207  cmp1h:
   208  	SLTU	X30, X29, X5
   209  	SLTU	X29, X30, X6
   210  	JMP	cmp_ret
   211  
   212  cmp_len:
   213  	MOV	X11, X8
   214  	MOV	X13, X9
   215  cmp:
   216  	SLTU	X9, X8, X5
   217  	SLTU	X8, X9, X6
   218  cmp_ret:
   219  	SUB	X5, X6, X10
   220  	RET