github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/equal_386.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // memequal(a, b unsafe.Pointer, size uintptr) bool
     9  TEXT runtime·memequal(SB),NOSPLIT,$0-13
    10  	MOVL	a+0(FP), SI
    11  	MOVL	b+4(FP), DI
    12  	CMPL	SI, DI
    13  	JEQ	eq
    14  	MOVL	size+8(FP), BX
    15  	LEAL	ret+12(FP), AX
    16  	JMP	memeqbody<>(SB)
    17  eq:
    18  	MOVB    $1, ret+12(FP)
    19  	RET
    20  
    21  // memequal_varlen(a, b unsafe.Pointer) bool
    22  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
    23  	MOVL    a+0(FP), SI
    24  	MOVL    b+4(FP), DI
    25  	CMPL    SI, DI
    26  	JEQ     eq
    27  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
    28  	LEAL	ret+8(FP), AX
    29  	JMP	memeqbody<>(SB)
    30  eq:
    31  	MOVB    $1, ret+8(FP)
    32  	RET
    33  
    34  // a in SI
    35  // b in DI
    36  // count in BX
    37  // address of result byte in AX
    38  TEXT memeqbody<>(SB),NOSPLIT,$0-0
    39  	CMPL	BX, $4
    40  	JB	small
    41  
    42  	// 64 bytes at a time using xmm registers
    43  hugeloop:
    44  	CMPL	BX, $64
    45  	JB	bigloop
    46  	CMPB	internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
    47  	JNE	bigloop
    48  	MOVOU	(SI), X0
    49  	MOVOU	(DI), X1
    50  	MOVOU	16(SI), X2
    51  	MOVOU	16(DI), X3
    52  	MOVOU	32(SI), X4
    53  	MOVOU	32(DI), X5
    54  	MOVOU	48(SI), X6
    55  	MOVOU	48(DI), X7
    56  	PCMPEQB	X1, X0
    57  	PCMPEQB	X3, X2
    58  	PCMPEQB	X5, X4
    59  	PCMPEQB	X7, X6
    60  	PAND	X2, X0
    61  	PAND	X6, X4
    62  	PAND	X4, X0
    63  	PMOVMSKB X0, DX
    64  	ADDL	$64, SI
    65  	ADDL	$64, DI
    66  	SUBL	$64, BX
    67  	CMPL	DX, $0xffff
    68  	JEQ	hugeloop
    69  	MOVB	$0, (AX)
    70  	RET
    71  
    72  	// 4 bytes at a time using 32-bit register
    73  bigloop:
    74  	CMPL	BX, $4
    75  	JBE	leftover
    76  	MOVL	(SI), CX
    77  	MOVL	(DI), DX
    78  	ADDL	$4, SI
    79  	ADDL	$4, DI
    80  	SUBL	$4, BX
    81  	CMPL	CX, DX
    82  	JEQ	bigloop
    83  	MOVB	$0, (AX)
    84  	RET
    85  
    86  	// remaining 0-4 bytes
    87  leftover:
    88  	MOVL	-4(SI)(BX*1), CX
    89  	MOVL	-4(DI)(BX*1), DX
    90  	CMPL	CX, DX
    91  	SETEQ	(AX)
    92  	RET
    93  
    94  small:
    95  	CMPL	BX, $0
    96  	JEQ	equal
    97  
    98  	LEAL	0(BX*8), CX
    99  	NEGL	CX
   100  
   101  	MOVL	SI, DX
   102  	CMPB	DX, $0xfc
   103  	JA	si_high
   104  
   105  	// load at SI won't cross a page boundary.
   106  	MOVL	(SI), SI
   107  	JMP	si_finish
   108  si_high:
   109  	// address ends in 111111xx. Load up to bytes we want, move to correct position.
   110  	MOVL	-4(SI)(BX*1), SI
   111  	SHRL	CX, SI
   112  si_finish:
   113  
   114  	// same for DI.
   115  	MOVL	DI, DX
   116  	CMPB	DX, $0xfc
   117  	JA	di_high
   118  	MOVL	(DI), DI
   119  	JMP	di_finish
   120  di_high:
   121  	MOVL	-4(DI)(BX*1), DI
   122  	SHRL	CX, DI
   123  di_finish:
   124  
   125  	SUBL	SI, DI
   126  	SHLL	CX, DI
   127  equal:
   128  	SETEQ	(AX)
   129  	RET