github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/internal/bytealg/broadcast_amd64.s (about) 1 //go:build !purego 2 3 #include "textflag.h" 4 5 // func broadcastAVX2(dst []byte, src byte) 6 TEXT ·broadcastAVX2(SB), NOSPLIT, $0-25 7 MOVQ dst_base+0(FP), AX 8 MOVQ dst_len+8(FP), BX 9 MOVBQZX src+24(FP), CX 10 11 CMPQ BX, $8 12 JBE test 13 14 CMPQ BX, $64 15 JB init8 16 17 XORQ SI, SI 18 MOVQ BX, DX 19 SHRQ $6, DX 20 SHLQ $6, DX 21 MOVQ CX, X0 22 VPBROADCASTB X0, Y0 23 loop64: 24 VMOVDQU Y0, (AX)(SI*1) 25 VMOVDQU Y0, 32(AX)(SI*1) 26 ADDQ $64, SI 27 CMPQ SI, DX 28 JNE loop64 29 VMOVDQU Y0, -64(AX)(BX*1) 30 VMOVDQU Y0, -32(AX)(BX*1) 31 VZEROUPPER 32 RET 33 34 init8: 35 MOVQ $0x0101010101010101, R8 36 IMULQ R8, CX 37 loop8: 38 MOVQ CX, -8(AX)(BX*1) 39 SUBQ $8, BX 40 CMPQ BX, $8 41 JAE loop8 42 MOVQ CX, (AX) 43 RET 44 45 loop: 46 MOVB CX, -1(AX)(BX*1) 47 DECQ BX 48 test: 49 CMPQ BX, $0 50 JNE loop 51 RET