github.com/fisco-bcos/crypto@v0.0.0-20200202032121-bd8ab0b5d4f1/internal/bytealg/equal_amd64p32.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 // memequal(a, b unsafe.Pointer, size uintptr) bool 9 TEXT runtime·memequal(SB),NOSPLIT,$0-17 10 MOVL a+0(FP), SI 11 MOVL b+4(FP), DI 12 CMPL SI, DI 13 JEQ eq 14 MOVL size+8(FP), BX 15 CALL memeqbody<>(SB) 16 MOVB AX, ret+16(FP) 17 RET 18 eq: 19 MOVB $1, ret+16(FP) 20 RET 21 22 // memequal_varlen(a, b unsafe.Pointer) bool 23 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 24 MOVL a+0(FP), SI 25 MOVL b+4(FP), DI 26 CMPL SI, DI 27 JEQ eq 28 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 29 CALL memeqbody<>(SB) 30 MOVB AX, ret+8(FP) 31 RET 32 eq: 33 MOVB $1, ret+8(FP) 34 RET 35 36 // a in SI 37 // b in DI 38 // count in BX 39 TEXT memeqbody<>(SB),NOSPLIT,$0-0 40 XORQ AX, AX 41 42 CMPQ BX, $8 43 JB small 44 45 // 64 bytes at a time using xmm registers 46 hugeloop: 47 CMPQ BX, $64 48 JB bigloop 49 MOVOU (SI), X0 50 MOVOU (DI), X1 51 MOVOU 16(SI), X2 52 MOVOU 16(DI), X3 53 MOVOU 32(SI), X4 54 MOVOU 32(DI), X5 55 MOVOU 48(SI), X6 56 MOVOU 48(DI), X7 57 PCMPEQB X1, X0 58 PCMPEQB X3, X2 59 PCMPEQB X5, X4 60 PCMPEQB X7, X6 61 PAND X2, X0 62 PAND X6, X4 63 PAND X4, X0 64 PMOVMSKB X0, DX 65 ADDQ $64, SI 66 ADDQ $64, DI 67 SUBQ $64, BX 68 CMPL DX, $0xffff 69 JEQ hugeloop 70 RET 71 72 // 8 bytes at a time using 64-bit register 73 bigloop: 74 CMPQ BX, $8 75 JBE leftover 76 MOVQ (SI), CX 77 MOVQ (DI), DX 78 ADDQ $8, SI 79 ADDQ $8, DI 80 SUBQ $8, BX 81 CMPQ CX, DX 82 JEQ bigloop 83 RET 84 85 // remaining 0-8 bytes 86 leftover: 87 ADDQ BX, SI 88 ADDQ BX, DI 89 MOVQ -8(SI), CX 90 MOVQ -8(DI), DX 91 CMPQ CX, DX 92 SETEQ AX 93 RET 94 95 small: 96 CMPQ BX, $0 97 JEQ equal 98 99 LEAQ 0(BX*8), CX 100 NEGQ CX 101 102 CMPB SI, $0xf8 103 JA si_high 104 105 // load at SI won't cross a page boundary. 106 MOVQ (SI), SI 107 JMP si_finish 108 si_high: 109 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 110 MOVQ BX, DX 111 ADDQ SI, DX 112 MOVQ -8(DX), SI 113 SHRQ CX, SI 114 si_finish: 115 116 // same for DI. 117 CMPB DI, $0xf8 118 JA di_high 119 MOVQ (DI), DI 120 JMP di_finish 121 di_high: 122 MOVQ BX, DX 123 ADDQ DI, DX 124 MOVQ -8(DX), DI 125 SHRQ CX, DI 126 di_finish: 127 128 SUBQ SI, DI 129 SHLQ CX, DI 130 equal: 131 SETEQ AX 132 RET