github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/internal/bytealg/indexbyte_amd64p32.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·IndexByte(SB),NOSPLIT,$0-20
     9  	MOVL b_base+0(FP), SI
    10  	MOVL b_len+4(FP), BX
    11  	MOVB c+12(FP), AL
    12  	CALL indexbytebody<>(SB)
    13  	MOVL AX, ret+16(FP)
    14  	RET
    15  
    16  TEXT ·IndexByteString(SB),NOSPLIT,$0-20
    17  	MOVL s_base+0(FP), SI
    18  	MOVL s_len+4(FP), BX
    19  	MOVB c+8(FP), AL
    20  	CALL indexbytebody<>(SB)
    21  	MOVL AX, ret+16(FP)
    22  	RET
    23  
    24  TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
    25  	FUNCDATA $0, ·IndexByte·args_stackmap(SB)
    26  	MOVL b_base+0(FP), SI
    27  	MOVL b_len+4(FP), BX
    28  	MOVB c+12(FP), AL
    29  	CALL indexbytebody<>(SB)
    30  	MOVL AX, ret+16(FP)
    31  	RET
    32  
    33  TEXT strings·IndexByte(SB),NOSPLIT,$0-20
    34  	FUNCDATA $0, ·IndexByteString·args_stackmap(SB)
    35  	MOVL s_base+0(FP), SI
    36  	MOVL s_len+4(FP), BX
    37  	MOVB c+8(FP), AL
    38  	CALL indexbytebody<>(SB)
    39  	MOVL AX, ret+16(FP)
    40  	RET
    41  
    42  // input:
    43  //   SI: data
    44  //   BX: data len
    45  //   AL: byte sought
    46  // output:
    47  //   AX
    48  TEXT indexbytebody<>(SB),NOSPLIT,$0
    49  	MOVL SI, DI
    50  
    51  	CMPL BX, $16
    52  	JLT small
    53  
    54  	// round up to first 16-byte boundary
    55  	TESTL $15, SI
    56  	JZ aligned
    57  	MOVL SI, CX
    58  	ANDL $~15, CX
    59  	ADDL $16, CX
    60  
    61  	// search the beginning
    62  	SUBL SI, CX
    63  	REPN; SCASB
    64  	JZ success
    65  
    66  // DI is 16-byte aligned; get ready to search using SSE instructions
    67  aligned:
    68  	// round down to last 16-byte boundary
    69  	MOVL BX, R11
    70  	ADDL SI, R11
    71  	ANDL $~15, R11
    72  
    73  	// shuffle X0 around so that each byte contains c
    74  	MOVD AX, X0
    75  	PUNPCKLBW X0, X0
    76  	PUNPCKLBW X0, X0
    77  	PSHUFL $0, X0, X0
    78  	JMP condition
    79  
    80  sse:
    81  	// move the next 16-byte chunk of the buffer into X1
    82  	MOVO (DI), X1
    83  	// compare bytes in X0 to X1
    84  	PCMPEQB X0, X1
    85  	// take the top bit of each byte in X1 and put the result in DX
    86  	PMOVMSKB X1, DX
    87  	TESTL DX, DX
    88  	JNZ ssesuccess
    89  	ADDL $16, DI
    90  
    91  condition:
    92  	CMPL DI, R11
    93  	JNE sse
    94  
    95  	// search the end
    96  	MOVL SI, CX
    97  	ADDL BX, CX
    98  	SUBL R11, CX
    99  	// if CX == 0, the zero flag will be set and we'll end up
   100  	// returning a false success
   101  	JZ failure
   102  	REPN; SCASB
   103  	JZ success
   104  
   105  failure:
   106  	MOVL $-1, AX
   107  	RET
   108  
   109  // handle for lengths < 16
   110  small:
   111  	MOVL BX, CX
   112  	REPN; SCASB
   113  	JZ success
   114  	MOVL $-1, AX
   115  	RET
   116  
   117  // we've found the chunk containing the byte
   118  // now just figure out which specific byte it is
   119  ssesuccess:
   120  	// get the index of the least significant set bit
   121  	BSFW DX, DX
   122  	SUBL SI, DI
   123  	ADDL DI, DX
   124  	MOVL DX, AX
   125  	RET
   126  
   127  success:
   128  	SUBL SI, DI
   129  	SUBL $1, DI
   130  	MOVL DI, AX
   131  	RET