github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/internal/bytealg/indexbyte_amd64p32.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 TEXT ·IndexByte(SB),NOSPLIT,$0-20 9 MOVL b_base+0(FP), SI 10 MOVL b_len+4(FP), BX 11 MOVB c+12(FP), AL 12 CALL indexbytebody<>(SB) 13 MOVL AX, ret+16(FP) 14 RET 15 16 TEXT ·IndexByteString(SB),NOSPLIT,$0-20 17 MOVL s_base+0(FP), SI 18 MOVL s_len+4(FP), BX 19 MOVB c+8(FP), AL 20 CALL indexbytebody<>(SB) 21 MOVL AX, ret+16(FP) 22 RET 23 24 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 25 FUNCDATA $0, ·IndexByte·args_stackmap(SB) 26 MOVL b_base+0(FP), SI 27 MOVL b_len+4(FP), BX 28 MOVB c+12(FP), AL 29 CALL indexbytebody<>(SB) 30 MOVL AX, ret+16(FP) 31 RET 32 33 TEXT strings·IndexByte(SB),NOSPLIT,$0-20 34 FUNCDATA $0, ·IndexByteString·args_stackmap(SB) 35 MOVL s_base+0(FP), SI 36 MOVL s_len+4(FP), BX 37 MOVB c+8(FP), AL 38 CALL indexbytebody<>(SB) 39 MOVL AX, ret+16(FP) 40 RET 41 42 // input: 43 // SI: data 44 // BX: data len 45 // AL: byte sought 46 // output: 47 // AX 48 TEXT indexbytebody<>(SB),NOSPLIT,$0 49 MOVL SI, DI 50 51 CMPL BX, $16 52 JLT small 53 54 // round up to first 16-byte boundary 55 TESTL $15, SI 56 JZ aligned 57 MOVL SI, CX 58 ANDL $~15, CX 59 ADDL $16, CX 60 61 // search the beginning 62 SUBL SI, CX 63 REPN; SCASB 64 JZ success 65 66 // DI is 16-byte aligned; get ready to search using SSE instructions 67 aligned: 68 // round down to last 16-byte boundary 69 MOVL BX, R11 70 ADDL SI, R11 71 ANDL $~15, R11 72 73 // shuffle X0 around so that each byte contains c 74 MOVD AX, X0 75 PUNPCKLBW X0, X0 76 PUNPCKLBW X0, X0 77 PSHUFL $0, X0, X0 78 JMP condition 79 80 sse: 81 // move the next 16-byte chunk of the buffer into X1 82 MOVO (DI), X1 83 // compare bytes in X0 to X1 84 PCMPEQB X0, X1 85 // take the top bit of each byte in X1 and put the result in DX 86 PMOVMSKB X1, DX 87 TESTL DX, DX 88 JNZ ssesuccess 89 ADDL $16, DI 90 91 condition: 92 CMPL DI, R11 93 JNE sse 94 95 // search the end 96 MOVL SI, CX 97 ADDL BX, CX 98 SUBL R11, CX 99 // if CX == 0, the zero flag will be set and we'll end up 100 // returning a false success 101 JZ failure 102 REPN; SCASB 103 JZ success 104 105 failure: 106 MOVL $-1, AX 107 RET 108 109 // handle for lengths < 16 110 small: 111 MOVL BX, CX 112 REPN; SCASB 113 JZ success 114 MOVL $-1, AX 115 RET 116 117 // we've found the chunk containing the byte 118 // now just figure out which specific byte it is 119 ssesuccess: 120 // get the index of the least significant set bit 121 BSFW DX, DX 122 SUBL SI, DI 123 ADDL DI, DX 124 MOVL DX, AX 125 RET 126 127 success: 128 SUBL SI, DI 129 SUBL $1, DI 130 MOVL DI, AX 131 RET