github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/internal/bytealg/compare_amd64p32.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 TEXT ·Compare(SB),NOSPLIT,$0-28 9 MOVL a_base+0(FP), SI 10 MOVL a_len+4(FP), BX 11 MOVL b_base+12(FP), DI 12 MOVL b_len+16(FP), DX 13 CALL cmpbody<>(SB) 14 MOVL AX, ret+24(FP) 15 RET 16 17 TEXT bytes·Compare(SB),NOSPLIT,$0-28 18 FUNCDATA $0, ·Compare·args_stackmap(SB) 19 MOVL a_base+0(FP), SI 20 MOVL a_len+4(FP), BX 21 MOVL b_base+12(FP), DI 22 MOVL b_len+16(FP), DX 23 CALL cmpbody<>(SB) 24 MOVL AX, ret+24(FP) 25 RET 26 27 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 28 MOVL a_base+0(FP), SI 29 MOVL a_len+4(FP), BX 30 MOVL b_base+8(FP), DI 31 MOVL b_len+12(FP), DX 32 CALL cmpbody<>(SB) 33 MOVL AX, ret+16(FP) 34 RET 35 36 // input: 37 // SI = a 38 // DI = b 39 // BX = alen 40 // DX = blen 41 // output: 42 // AX = 1/0/-1 43 TEXT cmpbody<>(SB),NOSPLIT,$0-0 44 CMPQ SI, DI 45 JEQ allsame 46 CMPQ BX, DX 47 MOVQ DX, R8 48 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 49 CMPQ R8, $8 50 JB small 51 52 loop: 53 CMPQ R8, $16 54 JBE _0through16 55 MOVOU (SI), X0 56 MOVOU (DI), X1 57 PCMPEQB X0, X1 58 PMOVMSKB X1, AX 59 XORQ $0xffff, AX // convert EQ to NE 60 JNE diff16 // branch if at least one byte is not equal 61 ADDQ $16, SI 62 ADDQ $16, DI 63 SUBQ $16, R8 64 JMP loop 65 66 // AX = bit mask of differences 67 diff16: 68 BSFQ AX, BX // index of first byte that differs 69 XORQ AX, AX 70 ADDQ BX, SI 71 MOVB (SI), CX 72 ADDQ BX, DI 73 CMPB CX, (DI) 74 SETHI AX 75 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 76 RET 77 78 // 0 through 16 bytes left, alen>=8, blen>=8 79 _0through16: 80 CMPQ R8, $8 81 JBE _0through8 82 MOVQ (SI), AX 83 MOVQ (DI), CX 84 CMPQ AX, CX 85 JNE diff8 86 _0through8: 87 ADDQ R8, SI 88 ADDQ R8, DI 89 MOVQ -8(SI), AX 90 MOVQ -8(DI), CX 91 CMPQ AX, CX 92 JEQ allsame 93 94 // AX and CX contain parts of a and b that differ. 95 diff8: 96 BSWAPQ AX // reverse order of bytes 97 BSWAPQ CX 98 XORQ AX, CX 99 BSRQ CX, CX // index of highest bit difference 100 SHRQ CX, AX // move a's bit to bottom 101 ANDQ $1, AX // mask bit 102 LEAQ -1(AX*2), AX // 1/0 => +1/-1 103 RET 104 105 // 0-7 bytes in common 106 small: 107 LEAQ (R8*8), CX // bytes left -> bits left 108 NEGQ CX // - bits lift (== 64 - bits left mod 64) 109 JEQ allsame 110 111 // load bytes of a into high bytes of AX 112 CMPB SI, $0xf8 113 JA si_high 114 MOVQ (SI), SI 115 JMP si_finish 116 si_high: 117 ADDQ R8, SI 118 MOVQ -8(SI), SI 119 SHRQ CX, SI 120 si_finish: 121 SHLQ CX, SI 122 123 // load bytes of b in to high bytes of BX 124 CMPB DI, $0xf8 125 JA di_high 126 MOVQ (DI), DI 127 JMP di_finish 128 di_high: 129 ADDQ R8, DI 130 MOVQ -8(DI), DI 131 SHRQ CX, DI 132 di_finish: 133 SHLQ CX, DI 134 135 BSWAPQ SI // reverse order of bytes 136 BSWAPQ DI 137 XORQ SI, DI // find bit differences 138 JEQ allsame 139 BSRQ DI, CX // index of highest bit difference 140 SHRQ CX, SI // move a's bit to bottom 141 ANDQ $1, SI // mask bit 142 LEAQ -1(SI*2), AX // 1/0 => +1/-1 143 RET 144 145 allsame: 146 XORQ AX, AX 147 XORQ CX, CX 148 CMPQ BX, DX 149 SETGT AX // 1 if alen > blen 150 SETEQ CX // 1 if alen == blen 151 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 152 RET