github.com/JimmyHuang454/JLS-go@v0.0.0-20230831150107-90d536585ba0/internal/bytealg/equal_386.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // memequal(a, b unsafe.Pointer, size uintptr) bool
     9  TEXT runtime·memequal(SB),NOSPLIT,$0-13
    10  	MOVL	a+0(FP), SI
    11  	MOVL	b+4(FP), DI
    12  	CMPL	SI, DI
    13  	JEQ	eq
    14  	MOVL	size+8(FP), BX
    15  	LEAL	ret+12(FP), AX
    16  	JMP	memeqbody<>(SB)
    17  eq:
    18  	MOVB    $1, ret+12(FP)
    19  	RET
    20  
    21  // memequal_varlen(a, b unsafe.Pointer) bool
    22  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
    23  	MOVL    a+0(FP), SI
    24  	MOVL    b+4(FP), DI
    25  	CMPL    SI, DI
    26  	JEQ     eq
    27  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
    28  	LEAL	ret+8(FP), AX
    29  	JMP	memeqbody<>(SB)
    30  eq:
    31  	MOVB    $1, ret+8(FP)
    32  	RET
    33  
    34  // a in SI
    35  // b in DI
    36  // count in BX
    37  // address of result byte in AX
    38  TEXT memeqbody<>(SB),NOSPLIT,$0-0
    39  	CMPL	BX, $4
    40  	JB	small
    41  
    42  	// 64 bytes at a time using xmm registers
    43  hugeloop:
    44  	CMPL	BX, $64
    45  	JB	bigloop
    46  #ifdef GO386_softfloat
    47  	JMP	bigloop
    48  #endif
    49  	MOVOU	(SI), X0
    50  	MOVOU	(DI), X1
    51  	MOVOU	16(SI), X2
    52  	MOVOU	16(DI), X3
    53  	MOVOU	32(SI), X4
    54  	MOVOU	32(DI), X5
    55  	MOVOU	48(SI), X6
    56  	MOVOU	48(DI), X7
    57  	PCMPEQB	X1, X0
    58  	PCMPEQB	X3, X2
    59  	PCMPEQB	X5, X4
    60  	PCMPEQB	X7, X6
    61  	PAND	X2, X0
    62  	PAND	X6, X4
    63  	PAND	X4, X0
    64  	PMOVMSKB X0, DX
    65  	ADDL	$64, SI
    66  	ADDL	$64, DI
    67  	SUBL	$64, BX
    68  	CMPL	DX, $0xffff
    69  	JEQ	hugeloop
    70  	MOVB	$0, (AX)
    71  	RET
    72  
    73  	// 4 bytes at a time using 32-bit register
    74  bigloop:
    75  	CMPL	BX, $4
    76  	JBE	leftover
    77  	MOVL	(SI), CX
    78  	MOVL	(DI), DX
    79  	ADDL	$4, SI
    80  	ADDL	$4, DI
    81  	SUBL	$4, BX
    82  	CMPL	CX, DX
    83  	JEQ	bigloop
    84  	MOVB	$0, (AX)
    85  	RET
    86  
    87  	// remaining 0-4 bytes
    88  leftover:
    89  	MOVL	-4(SI)(BX*1), CX
    90  	MOVL	-4(DI)(BX*1), DX
    91  	CMPL	CX, DX
    92  	SETEQ	(AX)
    93  	RET
    94  
    95  small:
    96  	CMPL	BX, $0
    97  	JEQ	equal
    98  
    99  	LEAL	0(BX*8), CX
   100  	NEGL	CX
   101  
   102  	MOVL	SI, DX
   103  	CMPB	DX, $0xfc
   104  	JA	si_high
   105  
   106  	// load at SI won't cross a page boundary.
   107  	MOVL	(SI), SI
   108  	JMP	si_finish
   109  si_high:
   110  	// address ends in 111111xx. Load up to bytes we want, move to correct position.
   111  	MOVL	-4(SI)(BX*1), SI
   112  	SHRL	CX, SI
   113  si_finish:
   114  
   115  	// same for DI.
   116  	MOVL	DI, DX
   117  	CMPB	DX, $0xfc
   118  	JA	di_high
   119  	MOVL	(DI), DI
   120  	JMP	di_finish
   121  di_high:
   122  	MOVL	-4(DI)(BX*1), DI
   123  	SHRL	CX, DI
   124  di_finish:
   125  
   126  	SUBL	SI, DI
   127  	SHLL	CX, DI
   128  equal:
   129  	SETEQ	(AX)
   130  	RET