github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/bytealg/equal_386.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Equal(SB),NOSPLIT,$0-25
     9  	MOVL	a_len+4(FP), BX
    10  	MOVL	b_len+16(FP), CX
    11  	CMPL	BX, CX
    12  	JNE	neq
    13  	MOVL	a_base+0(FP), SI
    14  	MOVL	b_base+12(FP), DI
    15  	CMPL	SI, DI
    16  	JEQ	eq
    17  	LEAL	ret+24(FP), AX
    18  	JMP	memeqbody<>(SB)
    19  neq:
    20  	MOVB	$0, ret+24(FP)
    21  	RET
    22  eq:
    23  	MOVB	$1, ret+24(FP)
    24  	RET
    25  
    26  // memequal(a, b unsafe.Pointer, size uintptr) bool
    27  TEXT runtime·memequal(SB),NOSPLIT,$0-13
    28  	MOVL	a+0(FP), SI
    29  	MOVL	b+4(FP), DI
    30  	CMPL	SI, DI
    31  	JEQ	eq
    32  	MOVL	size+8(FP), BX
    33  	LEAL	ret+12(FP), AX
    34  	JMP	memeqbody<>(SB)
    35  eq:
    36  	MOVB    $1, ret+12(FP)
    37  	RET
    38  
    39  // memequal_varlen(a, b unsafe.Pointer) bool
    40  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
    41  	MOVL    a+0(FP), SI
    42  	MOVL    b+4(FP), DI
    43  	CMPL    SI, DI
    44  	JEQ     eq
    45  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
    46  	LEAL	ret+8(FP), AX
    47  	JMP	memeqbody<>(SB)
    48  eq:
    49  	MOVB    $1, ret+8(FP)
    50  	RET
    51  
    52  // a in SI
    53  // b in DI
    54  // count in BX
    55  // address of result byte in AX
    56  TEXT memeqbody<>(SB),NOSPLIT,$0-0
    57  	CMPL	BX, $4
    58  	JB	small
    59  
    60  	// 64 bytes at a time using xmm registers
    61  hugeloop:
    62  	CMPL	BX, $64
    63  	JB	bigloop
    64  	CMPB	internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
    65  	JNE	bigloop
    66  	MOVOU	(SI), X0
    67  	MOVOU	(DI), X1
    68  	MOVOU	16(SI), X2
    69  	MOVOU	16(DI), X3
    70  	MOVOU	32(SI), X4
    71  	MOVOU	32(DI), X5
    72  	MOVOU	48(SI), X6
    73  	MOVOU	48(DI), X7
    74  	PCMPEQB	X1, X0
    75  	PCMPEQB	X3, X2
    76  	PCMPEQB	X5, X4
    77  	PCMPEQB	X7, X6
    78  	PAND	X2, X0
    79  	PAND	X6, X4
    80  	PAND	X4, X0
    81  	PMOVMSKB X0, DX
    82  	ADDL	$64, SI
    83  	ADDL	$64, DI
    84  	SUBL	$64, BX
    85  	CMPL	DX, $0xffff
    86  	JEQ	hugeloop
    87  	MOVB	$0, (AX)
    88  	RET
    89  
    90  	// 4 bytes at a time using 32-bit register
    91  bigloop:
    92  	CMPL	BX, $4
    93  	JBE	leftover
    94  	MOVL	(SI), CX
    95  	MOVL	(DI), DX
    96  	ADDL	$4, SI
    97  	ADDL	$4, DI
    98  	SUBL	$4, BX
    99  	CMPL	CX, DX
   100  	JEQ	bigloop
   101  	MOVB	$0, (AX)
   102  	RET
   103  
   104  	// remaining 0-4 bytes
   105  leftover:
   106  	MOVL	-4(SI)(BX*1), CX
   107  	MOVL	-4(DI)(BX*1), DX
   108  	CMPL	CX, DX
   109  	SETEQ	(AX)
   110  	RET
   111  
   112  small:
   113  	CMPL	BX, $0
   114  	JEQ	equal
   115  
   116  	LEAL	0(BX*8), CX
   117  	NEGL	CX
   118  
   119  	MOVL	SI, DX
   120  	CMPB	DX, $0xfc
   121  	JA	si_high
   122  
   123  	// load at SI won't cross a page boundary.
   124  	MOVL	(SI), SI
   125  	JMP	si_finish
   126  si_high:
   127  	// address ends in 111111xx. Load up to bytes we want, move to correct position.
   128  	MOVL	-4(SI)(BX*1), SI
   129  	SHRL	CX, SI
   130  si_finish:
   131  
   132  	// same for DI.
   133  	MOVL	DI, DX
   134  	CMPB	DX, $0xfc
   135  	JA	di_high
   136  	MOVL	(DI), DI
   137  	JMP	di_finish
   138  di_high:
   139  	MOVL	-4(DI)(BX*1), DI
   140  	SHRL	CX, DI
   141  di_finish:
   142  
   143  	SUBL	SI, DI
   144  	SHLL	CX, DI
   145  equal:
   146  	SETEQ	(AX)
   147  	RET