github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/bytealg/compare_amd64p32.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Compare(SB),NOSPLIT,$0-28
     9  	MOVL	a_base+0(FP), SI
    10  	MOVL	a_len+4(FP), BX
    11  	MOVL	b_base+12(FP), DI
    12  	MOVL	b_len+16(FP), DX
    13  	CALL	cmpbody<>(SB)
    14  	MOVL	AX, ret+24(FP)
    15  	RET
    16  
    17  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
    18  	MOVL	a_base+0(FP), SI
    19  	MOVL	a_len+4(FP), BX
    20  	MOVL	b_base+8(FP), DI
    21  	MOVL	b_len+12(FP), DX
    22  	CALL	cmpbody<>(SB)
    23  	MOVL	AX, ret+16(FP)
    24  	RET
    25  
    26  // input:
    27  //   SI = a
    28  //   DI = b
    29  //   BX = alen
    30  //   DX = blen
    31  // output:
    32  //   AX = 1/0/-1
    33  TEXT cmpbody<>(SB),NOSPLIT,$0-0
    34  	CMPQ	SI, DI
    35  	JEQ	allsame
    36  	CMPQ	BX, DX
    37  	MOVQ	DX, R8
    38  	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
    39  	CMPQ	R8, $8
    40  	JB	small
    41  
    42  loop:
    43  	CMPQ	R8, $16
    44  	JBE	_0through16
    45  	MOVOU	(SI), X0
    46  	MOVOU	(DI), X1
    47  	PCMPEQB X0, X1
    48  	PMOVMSKB X1, AX
    49  	XORQ	$0xffff, AX	// convert EQ to NE
    50  	JNE	diff16	// branch if at least one byte is not equal
    51  	ADDQ	$16, SI
    52  	ADDQ	$16, DI
    53  	SUBQ	$16, R8
    54  	JMP	loop
    55  
    56  	// AX = bit mask of differences
    57  diff16:
    58  	BSFQ	AX, BX	// index of first byte that differs
    59  	XORQ	AX, AX
    60  	ADDQ	BX, SI
    61  	MOVB	(SI), CX
    62  	ADDQ	BX, DI
    63  	CMPB	CX, (DI)
    64  	SETHI	AX
    65  	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
    66  	RET
    67  
    68  	// 0 through 16 bytes left, alen>=8, blen>=8
    69  _0through16:
    70  	CMPQ	R8, $8
    71  	JBE	_0through8
    72  	MOVQ	(SI), AX
    73  	MOVQ	(DI), CX
    74  	CMPQ	AX, CX
    75  	JNE	diff8
    76  _0through8:
    77  	ADDQ	R8, SI
    78  	ADDQ	R8, DI
    79  	MOVQ	-8(SI), AX
    80  	MOVQ	-8(DI), CX
    81  	CMPQ	AX, CX
    82  	JEQ	allsame
    83  
    84  	// AX and CX contain parts of a and b that differ.
    85  diff8:
    86  	BSWAPQ	AX	// reverse order of bytes
    87  	BSWAPQ	CX
    88  	XORQ	AX, CX
    89  	BSRQ	CX, CX	// index of highest bit difference
    90  	SHRQ	CX, AX	// move a's bit to bottom
    91  	ANDQ	$1, AX	// mask bit
    92  	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
    93  	RET
    94  
    95  	// 0-7 bytes in common
    96  small:
    97  	LEAQ	(R8*8), CX	// bytes left -> bits left
    98  	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
    99  	JEQ	allsame
   100  
   101  	// load bytes of a into high bytes of AX
   102  	CMPB	SI, $0xf8
   103  	JA	si_high
   104  	MOVQ	(SI), SI
   105  	JMP	si_finish
   106  si_high:
   107  	ADDQ	R8, SI
   108  	MOVQ	-8(SI), SI
   109  	SHRQ	CX, SI
   110  si_finish:
   111  	SHLQ	CX, SI
   112  
   113  	// load bytes of b in to high bytes of BX
   114  	CMPB	DI, $0xf8
   115  	JA	di_high
   116  	MOVQ	(DI), DI
   117  	JMP	di_finish
   118  di_high:
   119  	ADDQ	R8, DI
   120  	MOVQ	-8(DI), DI
   121  	SHRQ	CX, DI
   122  di_finish:
   123  	SHLQ	CX, DI
   124  
   125  	BSWAPQ	SI	// reverse order of bytes
   126  	BSWAPQ	DI
   127  	XORQ	SI, DI	// find bit differences
   128  	JEQ	allsame
   129  	BSRQ	DI, CX	// index of highest bit difference
   130  	SHRQ	CX, SI	// move a's bit to bottom
   131  	ANDQ	$1, SI	// mask bit
   132  	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
   133  	RET
   134  
   135  allsame:
   136  	XORQ	AX, AX
   137  	XORQ	CX, CX
   138  	CMPQ	BX, DX
   139  	SETGT	AX	// 1 if alen > blen
   140  	SETEQ	CX	// 1 if alen == blen
   141  	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
   142  	RET