github.com/parquet-go/parquet-go@v0.20.0/internal/bytealg/broadcast_amd64.s (about)

     1  //go:build !purego
     2  
     3  #include "textflag.h"
     4  
     5  // func broadcastAVX2(dst []byte, src byte)
     6  TEXT ·broadcastAVX2(SB), NOSPLIT, $0-25
     7      MOVQ dst_base+0(FP), AX
     8      MOVQ dst_len+8(FP), BX
     9      MOVBQZX src+24(FP), CX
    10  
    11      CMPQ BX, $8
    12      JBE test
    13  
    14      CMPQ BX, $64
    15      JB init8
    16  
    17      XORQ SI, SI
    18      MOVQ BX, DX
    19      SHRQ $6, DX
    20      SHLQ $6, DX
    21      MOVQ CX, X0
    22      VPBROADCASTB X0, Y0
    23  loop64:
    24      VMOVDQU Y0, (AX)(SI*1)
    25      VMOVDQU Y0, 32(AX)(SI*1)
    26      ADDQ $64, SI
    27      CMPQ SI, DX
    28      JNE loop64
    29      VMOVDQU Y0, -64(AX)(BX*1)
    30      VMOVDQU Y0, -32(AX)(BX*1)
    31      VZEROUPPER
    32      RET
    33  
    34  init8:
    35      MOVQ $0x0101010101010101, R8
    36      IMULQ R8, CX
    37  loop8:
    38      MOVQ CX, -8(AX)(BX*1)
    39      SUBQ $8, BX
    40      CMPQ BX, $8
    41      JAE loop8
    42      MOVQ CX, (AX)
    43      RET
    44  
    45  loop:
    46      MOVB CX, -1(AX)(BX*1)
    47      DECQ BX
    48  test:
    49      CMPQ BX, $0
    50      JNE loop
    51      RET