github.com/FISCO-BCOS/crypto@v0.0.0-20200202032121-bd8ab0b5d4f1/internal/bytealg/equal_amd64p32.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // memequal(a, b unsafe.Pointer, size uintptr) bool
     9  TEXT runtime·memequal(SB),NOSPLIT,$0-17
    10  	MOVL	a+0(FP), SI
    11  	MOVL	b+4(FP), DI
    12  	CMPL	SI, DI
    13  	JEQ	eq
    14  	MOVL	size+8(FP), BX
    15  	CALL	memeqbody<>(SB)
    16  	MOVB	AX, ret+16(FP)
    17  	RET
    18  eq:
    19  	MOVB    $1, ret+16(FP)
    20  	RET
    21  
    22  // memequal_varlen(a, b unsafe.Pointer) bool
    23  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
    24  	MOVL    a+0(FP), SI
    25  	MOVL    b+4(FP), DI
    26  	CMPL    SI, DI
    27  	JEQ     eq
    28  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
    29  	CALL    memeqbody<>(SB)
    30  	MOVB    AX, ret+8(FP)
    31  	RET
    32  eq:
    33  	MOVB    $1, ret+8(FP)
    34  	RET
    35  
    36  // a in SI
    37  // b in DI
    38  // count in BX
    39  TEXT memeqbody<>(SB),NOSPLIT,$0-0
    40  	XORQ	AX, AX
    41  
    42  	CMPQ	BX, $8
    43  	JB	small
    44  
    45  	// 64 bytes at a time using xmm registers
    46  hugeloop:
    47  	CMPQ	BX, $64
    48  	JB	bigloop
    49  	MOVOU	(SI), X0
    50  	MOVOU	(DI), X1
    51  	MOVOU	16(SI), X2
    52  	MOVOU	16(DI), X3
    53  	MOVOU	32(SI), X4
    54  	MOVOU	32(DI), X5
    55  	MOVOU	48(SI), X6
    56  	MOVOU	48(DI), X7
    57  	PCMPEQB	X1, X0
    58  	PCMPEQB	X3, X2
    59  	PCMPEQB	X5, X4
    60  	PCMPEQB	X7, X6
    61  	PAND	X2, X0
    62  	PAND	X6, X4
    63  	PAND	X4, X0
    64  	PMOVMSKB X0, DX
    65  	ADDQ	$64, SI
    66  	ADDQ	$64, DI
    67  	SUBQ	$64, BX
    68  	CMPL	DX, $0xffff
    69  	JEQ	hugeloop
    70  	RET
    71  
    72  	// 8 bytes at a time using 64-bit register
    73  bigloop:
    74  	CMPQ	BX, $8
    75  	JBE	leftover
    76  	MOVQ	(SI), CX
    77  	MOVQ	(DI), DX
    78  	ADDQ	$8, SI
    79  	ADDQ	$8, DI
    80  	SUBQ	$8, BX
    81  	CMPQ	CX, DX
    82  	JEQ	bigloop
    83  	RET
    84  
    85  	// remaining 0-8 bytes
    86  leftover:
    87  	ADDQ	BX, SI
    88  	ADDQ	BX, DI
    89  	MOVQ	-8(SI), CX
    90  	MOVQ	-8(DI), DX
    91  	CMPQ	CX, DX
    92  	SETEQ	AX
    93  	RET
    94  
    95  small:
    96  	CMPQ	BX, $0
    97  	JEQ	equal
    98  
    99  	LEAQ	0(BX*8), CX
   100  	NEGQ	CX
   101  
   102  	CMPB	SI, $0xf8
   103  	JA	si_high
   104  
   105  	// load at SI won't cross a page boundary.
   106  	MOVQ	(SI), SI
   107  	JMP	si_finish
   108  si_high:
   109  	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
   110  	MOVQ	BX, DX
   111  	ADDQ	SI, DX
   112  	MOVQ	-8(DX), SI
   113  	SHRQ	CX, SI
   114  si_finish:
   115  
   116  	// same for DI.
   117  	CMPB	DI, $0xf8
   118  	JA	di_high
   119  	MOVQ	(DI), DI
   120  	JMP	di_finish
   121  di_high:
   122  	MOVQ	BX, DX
   123  	ADDQ	DI, DX
   124  	MOVQ	-8(DX), DI
   125  	SHRQ	CX, DI
   126  di_finish:
   127  
   128  	SUBQ	SI, DI
   129  	SHLQ	CX, DI
   130  equal:
   131  	SETEQ	AX
   132  	RET