github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/internal/bytealg/compare_amd64p32.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Compare(SB),NOSPLIT,$0-28
     9  	MOVL	a_base+0(FP), SI
    10  	MOVL	a_len+4(FP), BX
    11  	MOVL	b_base+12(FP), DI
    12  	MOVL	b_len+16(FP), DX
    13  	CALL	cmpbody<>(SB)
    14  	MOVL	AX, ret+24(FP)
    15  	RET
    16  
    17  TEXT bytes·Compare(SB),NOSPLIT,$0-28
    18  	FUNCDATA $0, ·Compare·args_stackmap(SB)
    19  	MOVL	a_base+0(FP), SI
    20  	MOVL	a_len+4(FP), BX
    21  	MOVL	b_base+12(FP), DI
    22  	MOVL	b_len+16(FP), DX
    23  	CALL	cmpbody<>(SB)
    24  	MOVL	AX, ret+24(FP)
    25  	RET
    26  
    27  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
    28  	MOVL	a_base+0(FP), SI
    29  	MOVL	a_len+4(FP), BX
    30  	MOVL	b_base+8(FP), DI
    31  	MOVL	b_len+12(FP), DX
    32  	CALL	cmpbody<>(SB)
    33  	MOVL	AX, ret+16(FP)
    34  	RET
    35  
    36  // input:
    37  //   SI = a
    38  //   DI = b
    39  //   BX = alen
    40  //   DX = blen
    41  // output:
    42  //   AX = 1/0/-1
    43  TEXT cmpbody<>(SB),NOSPLIT,$0-0
    44  	CMPQ	SI, DI
    45  	JEQ	allsame
    46  	CMPQ	BX, DX
    47  	MOVQ	DX, R8
    48  	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
    49  	CMPQ	R8, $8
    50  	JB	small
    51  
    52  loop:
    53  	CMPQ	R8, $16
    54  	JBE	_0through16
    55  	MOVOU	(SI), X0
    56  	MOVOU	(DI), X1
    57  	PCMPEQB X0, X1
    58  	PMOVMSKB X1, AX
    59  	XORQ	$0xffff, AX	// convert EQ to NE
    60  	JNE	diff16	// branch if at least one byte is not equal
    61  	ADDQ	$16, SI
    62  	ADDQ	$16, DI
    63  	SUBQ	$16, R8
    64  	JMP	loop
    65  
    66  	// AX = bit mask of differences
    67  diff16:
    68  	BSFQ	AX, BX	// index of first byte that differs
    69  	XORQ	AX, AX
    70  	ADDQ	BX, SI
    71  	MOVB	(SI), CX
    72  	ADDQ	BX, DI
    73  	CMPB	CX, (DI)
    74  	SETHI	AX
    75  	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
    76  	RET
    77  
    78  	// 0 through 16 bytes left, alen>=8, blen>=8
    79  _0through16:
    80  	CMPQ	R8, $8
    81  	JBE	_0through8
    82  	MOVQ	(SI), AX
    83  	MOVQ	(DI), CX
    84  	CMPQ	AX, CX
    85  	JNE	diff8
    86  _0through8:
    87  	ADDQ	R8, SI
    88  	ADDQ	R8, DI
    89  	MOVQ	-8(SI), AX
    90  	MOVQ	-8(DI), CX
    91  	CMPQ	AX, CX
    92  	JEQ	allsame
    93  
    94  	// AX and CX contain parts of a and b that differ.
    95  diff8:
    96  	BSWAPQ	AX	// reverse order of bytes
    97  	BSWAPQ	CX
    98  	XORQ	AX, CX
    99  	BSRQ	CX, CX	// index of highest bit difference
   100  	SHRQ	CX, AX	// move a's bit to bottom
   101  	ANDQ	$1, AX	// mask bit
   102  	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
   103  	RET
   104  
   105  	// 0-7 bytes in common
   106  small:
   107  	LEAQ	(R8*8), CX	// bytes left -> bits left
   108  	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
   109  	JEQ	allsame
   110  
   111  	// load bytes of a into high bytes of AX
   112  	CMPB	SI, $0xf8
   113  	JA	si_high
   114  	MOVQ	(SI), SI
   115  	JMP	si_finish
   116  si_high:
   117  	ADDQ	R8, SI
   118  	MOVQ	-8(SI), SI
   119  	SHRQ	CX, SI
   120  si_finish:
   121  	SHLQ	CX, SI
   122  
   123  	// load bytes of b in to high bytes of BX
   124  	CMPB	DI, $0xf8
   125  	JA	di_high
   126  	MOVQ	(DI), DI
   127  	JMP	di_finish
   128  di_high:
   129  	ADDQ	R8, DI
   130  	MOVQ	-8(DI), DI
   131  	SHRQ	CX, DI
   132  di_finish:
   133  	SHLQ	CX, DI
   134  
   135  	BSWAPQ	SI	// reverse order of bytes
   136  	BSWAPQ	DI
   137  	XORQ	SI, DI	// find bit differences
   138  	JEQ	allsame
   139  	BSRQ	DI, CX	// index of highest bit difference
   140  	SHRQ	CX, SI	// move a's bit to bottom
   141  	ANDQ	$1, SI	// mask bit
   142  	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
   143  	RET
   144  
   145  allsame:
   146  	XORQ	AX, AX
   147  	XORQ	CX, CX
   148  	CMPQ	BX, DX
   149  	SETGT	AX	// 1 if alen > blen
   150  	SETEQ	CX	// 1 if alen == blen
   151  	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
   152  	RET