github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/internal/bytealg/compare_386.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Compare(SB),NOSPLIT,$0-28
     9  	MOVL	a_base+0(FP), SI
    10  	MOVL	a_len+4(FP), BX
    11  	MOVL	b_base+12(FP), DI
    12  	MOVL	b_len+16(FP), DX
    13  	LEAL	ret+24(FP), AX
    14  	JMP	cmpbody<>(SB)
    15  
    16  TEXT bytes·Compare(SB),NOSPLIT,$0-28
    17  	FUNCDATA $0, ·Compare·args_stackmap(SB)
    18  	MOVL	a_base+0(FP), SI
    19  	MOVL	a_len+4(FP), BX
    20  	MOVL	b_base+12(FP), DI
    21  	MOVL	b_len+16(FP), DX
    22  	LEAL	ret+24(FP), AX
    23  	JMP	cmpbody<>(SB)
    24  
    25  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
    26  	MOVL	a_base+0(FP), SI
    27  	MOVL	a_len+4(FP), BX
    28  	MOVL	b_base+8(FP), DI
    29  	MOVL	b_len+12(FP), DX
    30  	LEAL	ret+16(FP), AX
    31  	JMP	cmpbody<>(SB)
    32  
    33  // input:
    34  //   SI = a
    35  //   DI = b
    36  //   BX = alen
    37  //   DX = blen
    38  //   AX = address of return word (set to 1/0/-1)
    39  TEXT cmpbody<>(SB),NOSPLIT,$0-0
    40  	MOVL	DX, BP
    41  	SUBL	BX, DX // DX = blen-alen
    42  	JLE	2(PC)
    43  	MOVL	BX, BP // BP = min(alen, blen)
    44  	CMPL	SI, DI
    45  	JEQ	allsame
    46  	CMPL	BP, $4
    47  	JB	small
    48  	CMPB	internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
    49  	JNE	mediumloop
    50  largeloop:
    51  	CMPL	BP, $16
    52  	JB	mediumloop
    53  	MOVOU	(SI), X0
    54  	MOVOU	(DI), X1
    55  	PCMPEQB X0, X1
    56  	PMOVMSKB X1, BX
    57  	XORL	$0xffff, BX	// convert EQ to NE
    58  	JNE	diff16	// branch if at least one byte is not equal
    59  	ADDL	$16, SI
    60  	ADDL	$16, DI
    61  	SUBL	$16, BP
    62  	JMP	largeloop
    63  
    64  diff16:
    65  	BSFL	BX, BX	// index of first byte that differs
    66  	XORL	DX, DX
    67  	MOVB	(SI)(BX*1), CX
    68  	CMPB	CX, (DI)(BX*1)
    69  	SETHI	DX
    70  	LEAL	-1(DX*2), DX	// convert 1/0 to +1/-1
    71  	MOVL	DX, (AX)
    72  	RET
    73  
    74  mediumloop:
    75  	CMPL	BP, $4
    76  	JBE	_0through4
    77  	MOVL	(SI), BX
    78  	MOVL	(DI), CX
    79  	CMPL	BX, CX
    80  	JNE	diff4
    81  	ADDL	$4, SI
    82  	ADDL	$4, DI
    83  	SUBL	$4, BP
    84  	JMP	mediumloop
    85  
    86  _0through4:
    87  	MOVL	-4(SI)(BP*1), BX
    88  	MOVL	-4(DI)(BP*1), CX
    89  	CMPL	BX, CX
    90  	JEQ	allsame
    91  
    92  diff4:
    93  	BSWAPL	BX	// reverse order of bytes
    94  	BSWAPL	CX
    95  	XORL	BX, CX	// find bit differences
    96  	BSRL	CX, CX	// index of highest bit difference
    97  	SHRL	CX, BX	// move a's bit to bottom
    98  	ANDL	$1, BX	// mask bit
    99  	LEAL	-1(BX*2), BX // 1/0 => +1/-1
   100  	MOVL	BX, (AX)
   101  	RET
   102  
   103  	// 0-3 bytes in common
   104  small:
   105  	LEAL	(BP*8), CX
   106  	NEGL	CX
   107  	JEQ	allsame
   108  
   109  	// load si
   110  	CMPB	SI, $0xfc
   111  	JA	si_high
   112  	MOVL	(SI), SI
   113  	JMP	si_finish
   114  si_high:
   115  	MOVL	-4(SI)(BP*1), SI
   116  	SHRL	CX, SI
   117  si_finish:
   118  	SHLL	CX, SI
   119  
   120  	// same for di
   121  	CMPB	DI, $0xfc
   122  	JA	di_high
   123  	MOVL	(DI), DI
   124  	JMP	di_finish
   125  di_high:
   126  	MOVL	-4(DI)(BP*1), DI
   127  	SHRL	CX, DI
   128  di_finish:
   129  	SHLL	CX, DI
   130  
   131  	BSWAPL	SI	// reverse order of bytes
   132  	BSWAPL	DI
   133  	XORL	SI, DI	// find bit differences
   134  	JEQ	allsame
   135  	BSRL	DI, CX	// index of highest bit difference
   136  	SHRL	CX, SI	// move a's bit to bottom
   137  	ANDL	$1, SI	// mask bit
   138  	LEAL	-1(SI*2), BX // 1/0 => +1/-1
   139  	MOVL	BX, (AX)
   140  	RET
   141  
   142  	// all the bytes in common are the same, so we just need
   143  	// to compare the lengths.
   144  allsame:
   145  	XORL	BX, BX
   146  	XORL	CX, CX
   147  	TESTL	DX, DX
   148  	SETLT	BX	// 1 if alen > blen
   149  	SETEQ	CX	// 1 if alen == blen
   150  	LEAL	-1(CX)(BX*2), BX	// 1,0,-1 result
   151  	MOVL	BX, (AX)
   152  	RET