github.com/FISCO-BCOS/crypto@v0.0.0-20200202032121-bd8ab0b5d4f1/internal/bytealg/indexbyte_amd64p32.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 TEXT ·IndexByte(SB),NOSPLIT,$0-20 9 MOVL b_base+0(FP), SI 10 MOVL b_len+4(FP), BX 11 MOVB c+12(FP), AL 12 CALL indexbytebody<>(SB) 13 MOVL AX, ret+16(FP) 14 RET 15 16 TEXT ·IndexByteString(SB),NOSPLIT,$0-20 17 MOVL s_base+0(FP), SI 18 MOVL s_len+4(FP), BX 19 MOVB c+8(FP), AL 20 CALL indexbytebody<>(SB) 21 MOVL AX, ret+16(FP) 22 RET 23 24 // input: 25 // SI: data 26 // BX: data len 27 // AL: byte sought 28 // output: 29 // AX 30 TEXT indexbytebody<>(SB),NOSPLIT,$0 31 MOVL SI, DI 32 33 CMPL BX, $16 34 JLT small 35 36 // round up to first 16-byte boundary 37 TESTL $15, SI 38 JZ aligned 39 MOVL SI, CX 40 ANDL $~15, CX 41 ADDL $16, CX 42 43 // search the beginning 44 SUBL SI, CX 45 REPN; SCASB 46 JZ success 47 48 // DI is 16-byte aligned; get ready to search using SSE instructions 49 aligned: 50 // round down to last 16-byte boundary 51 MOVL BX, R11 52 ADDL SI, R11 53 ANDL $~15, R11 54 55 // shuffle X0 around so that each byte contains c 56 MOVD AX, X0 57 PUNPCKLBW X0, X0 58 PUNPCKLBW X0, X0 59 PSHUFL $0, X0, X0 60 JMP condition 61 62 sse: 63 // move the next 16-byte chunk of the buffer into X1 64 MOVO (DI), X1 65 // compare bytes in X0 to X1 66 PCMPEQB X0, X1 67 // take the top bit of each byte in X1 and put the result in DX 68 PMOVMSKB X1, DX 69 TESTL DX, DX 70 JNZ ssesuccess 71 ADDL $16, DI 72 73 condition: 74 CMPL DI, R11 75 JNE sse 76 77 // search the end 78 MOVL SI, CX 79 ADDL BX, CX 80 SUBL R11, CX 81 // if CX == 0, the zero flag will be set and we'll end up 82 // returning a false success 83 JZ failure 84 REPN; SCASB 85 JZ success 86 87 failure: 88 MOVL $-1, AX 89 RET 90 91 // handle for lengths < 16 92 small: 93 MOVL BX, CX 94 REPN; SCASB 95 JZ success 96 MOVL $-1, AX 97 RET 98 99 // we've found the chunk containing the byte 100 // now just figure out which specific byte it is 101 ssesuccess: 102 // get the index of the least significant set bit 103 BSFW DX, DX 104 SUBL SI, DI 105 ADDL DI, DX 106 MOVL DX, AX 107 RET 108 109 success: 110 SUBL SI, DI 111 SUBL $1, DI 112 MOVL DI, AX 113 RET