github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/bytes/asm_amd64.s (about)

     1  // Copyright 2009 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  TEXT ·IndexByte(SB),7,$0
     6  	MOVQ s+0(FP), SI
     7  	MOVQ s_len+8(FP), BX
     8  	MOVB c+24(FP), AL
     9  	MOVQ SI, DI
    10  
    11  	CMPQ BX, $16
    12  	JLT small
    13  
    14  	// round up to first 16-byte boundary
    15  	TESTQ $15, SI
    16  	JZ aligned
    17  	MOVQ SI, CX
    18  	ANDQ $~15, CX
    19  	ADDQ $16, CX
    20  
    21  	// search the beginning
    22  	SUBQ SI, CX
    23  	REPN; SCASB
    24  	JZ success
    25  
    26  // DI is 16-byte aligned; get ready to search using SSE instructions
    27  aligned:
    28  	// round down to last 16-byte boundary
    29  	MOVQ BX, R11
    30  	ADDQ SI, R11
    31  	ANDQ $~15, R11
    32  
    33  	// shuffle X0 around so that each byte contains c
    34  	MOVD AX, X0
    35  	PUNPCKLBW X0, X0
    36  	PUNPCKLBW X0, X0
    37  	PSHUFL $0, X0, X0
    38  	JMP condition
    39  
    40  sse:
    41  	// move the next 16-byte chunk of the buffer into X1
    42  	MOVO (DI), X1
    43  	// compare bytes in X0 to X1
    44  	PCMPEQB X0, X1
    45  	// take the top bit of each byte in X1 and put the result in DX
    46  	PMOVMSKB X1, DX
    47  	TESTL DX, DX
    48  	JNZ ssesuccess
    49  	ADDQ $16, DI
    50  
    51  condition:
    52  	CMPQ DI, R11
    53  	JLT sse
    54  
    55  	// search the end
    56  	MOVQ SI, CX
    57  	ADDQ BX, CX
    58  	SUBQ R11, CX
    59  	// if CX == 0, the zero flag will be set and we'll end up
    60  	// returning a false success
    61  	JZ failure
    62  	REPN; SCASB
    63  	JZ success
    64  
    65  failure:
    66  	MOVQ $-1, ret+32(FP)
    67  	RET
    68  
    69  // handle for lengths < 16
    70  small:
    71  	MOVQ BX, CX
    72  	REPN; SCASB
    73  	JZ success
    74  	MOVQ $-1, ret+32(FP)
    75  	RET
    76  
    77  // we've found the chunk containing the byte
    78  // now just figure out which specific byte it is
    79  ssesuccess:
    80  	// get the index of the least significant set bit
    81  	BSFW DX, DX
    82  	SUBQ SI, DI
    83  	ADDQ DI, DX
    84  	MOVQ DX, ret+32(FP)
    85  	RET
    86  
    87  success:
    88  	SUBQ SI, DI
    89  	SUBL $1, DI
    90  	MOVQ DI, ret+32(FP)
    91  	RET