github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/bytealg/equal_amd64p32.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Equal(SB),NOSPLIT,$0-25
     9  	MOVL	a_len+4(FP), BX
    10  	MOVL	b_len+16(FP), CX
    11  	CMPL	BX, CX
    12  	JNE	neq
    13  	MOVL	a_base+0(FP), SI
    14  	MOVL	b_base+12(FP), DI
    15  	CMPL	SI, DI
    16  	JEQ	eq
    17  	CALL	memeqbody<>(SB)
    18  	MOVB	AX, ret+24(FP)
    19  	RET
    20  neq:
    21  	MOVB	$0, ret+24(FP)
    22  	RET
    23  eq:
    24  	MOVB    $1, ret+24(FP)
    25  	RET
    26  
    27  // memequal(a, b unsafe.Pointer, size uintptr) bool
    28  TEXT runtime·memequal(SB),NOSPLIT,$0-17
    29  	MOVL	a+0(FP), SI
    30  	MOVL	b+4(FP), DI
    31  	CMPL	SI, DI
    32  	JEQ	eq
    33  	MOVL	size+8(FP), BX
    34  	CALL	memeqbody<>(SB)
    35  	MOVB	AX, ret+16(FP)
    36  	RET
    37  eq:
    38  	MOVB    $1, ret+16(FP)
    39  	RET
    40  
    41  // memequal_varlen(a, b unsafe.Pointer) bool
    42  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
    43  	MOVL    a+0(FP), SI
    44  	MOVL    b+4(FP), DI
    45  	CMPL    SI, DI
    46  	JEQ     eq
    47  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
    48  	CALL    memeqbody<>(SB)
    49  	MOVB    AX, ret+8(FP)
    50  	RET
    51  eq:
    52  	MOVB    $1, ret+8(FP)
    53  	RET
    54  
    55  // a in SI
    56  // b in DI
    57  // count in BX
    58  TEXT memeqbody<>(SB),NOSPLIT,$0-0
    59  	XORQ	AX, AX
    60  
    61  	CMPQ	BX, $8
    62  	JB	small
    63  
    64  	// 64 bytes at a time using xmm registers
    65  hugeloop:
    66  	CMPQ	BX, $64
    67  	JB	bigloop
    68  	MOVOU	(SI), X0
    69  	MOVOU	(DI), X1
    70  	MOVOU	16(SI), X2
    71  	MOVOU	16(DI), X3
    72  	MOVOU	32(SI), X4
    73  	MOVOU	32(DI), X5
    74  	MOVOU	48(SI), X6
    75  	MOVOU	48(DI), X7
    76  	PCMPEQB	X1, X0
    77  	PCMPEQB	X3, X2
    78  	PCMPEQB	X5, X4
    79  	PCMPEQB	X7, X6
    80  	PAND	X2, X0
    81  	PAND	X6, X4
    82  	PAND	X4, X0
    83  	PMOVMSKB X0, DX
    84  	ADDQ	$64, SI
    85  	ADDQ	$64, DI
    86  	SUBQ	$64, BX
    87  	CMPL	DX, $0xffff
    88  	JEQ	hugeloop
    89  	RET
    90  
    91  	// 8 bytes at a time using 64-bit register
    92  bigloop:
    93  	CMPQ	BX, $8
    94  	JBE	leftover
    95  	MOVQ	(SI), CX
    96  	MOVQ	(DI), DX
    97  	ADDQ	$8, SI
    98  	ADDQ	$8, DI
    99  	SUBQ	$8, BX
   100  	CMPQ	CX, DX
   101  	JEQ	bigloop
   102  	RET
   103  
   104  	// remaining 0-8 bytes
   105  leftover:
   106  	ADDQ	BX, SI
   107  	ADDQ	BX, DI
   108  	MOVQ	-8(SI), CX
   109  	MOVQ	-8(DI), DX
   110  	CMPQ	CX, DX
   111  	SETEQ	AX
   112  	RET
   113  
   114  small:
   115  	CMPQ	BX, $0
   116  	JEQ	equal
   117  
   118  	LEAQ	0(BX*8), CX
   119  	NEGQ	CX
   120  
   121  	CMPB	SI, $0xf8
   122  	JA	si_high
   123  
   124  	// load at SI won't cross a page boundary.
   125  	MOVQ	(SI), SI
   126  	JMP	si_finish
   127  si_high:
   128  	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
   129  	MOVQ	BX, DX
   130  	ADDQ	SI, DX
   131  	MOVQ	-8(DX), SI
   132  	SHRQ	CX, SI
   133  si_finish:
   134  
   135  	// same for DI.
   136  	CMPB	DI, $0xf8
   137  	JA	di_high
   138  	MOVQ	(DI), DI
   139  	JMP	di_finish
   140  di_high:
   141  	MOVQ	BX, DX
   142  	ADDQ	DI, DX
   143  	MOVQ	-8(DX), DI
   144  	SHRQ	CX, DI
   145  di_finish:
   146  
   147  	SUBQ	SI, DI
   148  	SHLQ	CX, DI
   149  equal:
   150  	SETEQ	AX
   151  	RET