github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/bytealg/compare_amd64p32.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 TEXT ·Compare(SB),NOSPLIT,$0-28 9 MOVL a_base+0(FP), SI 10 MOVL a_len+4(FP), BX 11 MOVL b_base+12(FP), DI 12 MOVL b_len+16(FP), DX 13 CALL cmpbody<>(SB) 14 MOVL AX, ret+24(FP) 15 RET 16 17 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 18 MOVL a_base+0(FP), SI 19 MOVL a_len+4(FP), BX 20 MOVL b_base+8(FP), DI 21 MOVL b_len+12(FP), DX 22 CALL cmpbody<>(SB) 23 MOVL AX, ret+16(FP) 24 RET 25 26 // input: 27 // SI = a 28 // DI = b 29 // BX = alen 30 // DX = blen 31 // output: 32 // AX = 1/0/-1 33 TEXT cmpbody<>(SB),NOSPLIT,$0-0 34 CMPQ SI, DI 35 JEQ allsame 36 CMPQ BX, DX 37 MOVQ DX, R8 38 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 39 CMPQ R8, $8 40 JB small 41 42 loop: 43 CMPQ R8, $16 44 JBE _0through16 45 MOVOU (SI), X0 46 MOVOU (DI), X1 47 PCMPEQB X0, X1 48 PMOVMSKB X1, AX 49 XORQ $0xffff, AX // convert EQ to NE 50 JNE diff16 // branch if at least one byte is not equal 51 ADDQ $16, SI 52 ADDQ $16, DI 53 SUBQ $16, R8 54 JMP loop 55 56 // AX = bit mask of differences 57 diff16: 58 BSFQ AX, BX // index of first byte that differs 59 XORQ AX, AX 60 ADDQ BX, SI 61 MOVB (SI), CX 62 ADDQ BX, DI 63 CMPB CX, (DI) 64 SETHI AX 65 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 66 RET 67 68 // 0 through 16 bytes left, alen>=8, blen>=8 69 _0through16: 70 CMPQ R8, $8 71 JBE _0through8 72 MOVQ (SI), AX 73 MOVQ (DI), CX 74 CMPQ AX, CX 75 JNE diff8 76 _0through8: 77 ADDQ R8, SI 78 ADDQ R8, DI 79 MOVQ -8(SI), AX 80 MOVQ -8(DI), CX 81 CMPQ AX, CX 82 JEQ allsame 83 84 // AX and CX contain parts of a and b that differ. 85 diff8: 86 BSWAPQ AX // reverse order of bytes 87 BSWAPQ CX 88 XORQ AX, CX 89 BSRQ CX, CX // index of highest bit difference 90 SHRQ CX, AX // move a's bit to bottom 91 ANDQ $1, AX // mask bit 92 LEAQ -1(AX*2), AX // 1/0 => +1/-1 93 RET 94 95 // 0-7 bytes in common 96 small: 97 LEAQ (R8*8), CX // bytes left -> bits left 98 NEGQ CX // - bits lift (== 64 - bits left mod 64) 99 JEQ allsame 100 101 // load bytes of a into high bytes of AX 102 CMPB SI, $0xf8 103 JA si_high 104 MOVQ (SI), SI 105 JMP si_finish 106 si_high: 107 ADDQ R8, SI 108 MOVQ -8(SI), SI 109 SHRQ CX, SI 110 si_finish: 111 SHLQ CX, SI 112 113 // load bytes of b in to high bytes of BX 114 CMPB DI, $0xf8 115 JA di_high 116 MOVQ (DI), DI 117 JMP di_finish 118 di_high: 119 ADDQ R8, DI 120 MOVQ -8(DI), DI 121 SHRQ CX, DI 122 di_finish: 123 SHLQ CX, DI 124 125 BSWAPQ SI // reverse order of bytes 126 BSWAPQ DI 127 XORQ SI, DI // find bit differences 128 JEQ allsame 129 BSRQ DI, CX // index of highest bit difference 130 SHRQ CX, SI // move a's bit to bottom 131 ANDQ $1, SI // mask bit 132 LEAQ -1(SI*2), AX // 1/0 => +1/-1 133 RET 134 135 allsame: 136 XORQ AX, AX 137 XORQ CX, CX 138 CMPQ BX, DX 139 SETGT AX // 1 if alen > blen 140 SETEQ CX // 1 if alen == blen 141 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 142 RET