github.com/parquet-go/parquet-go@v0.20.0/column_buffer_amd64.s (about)

     1  //go:build !purego
     2  
     3  #include "textflag.h"
     4  
     5  // func broadcastRangeInt32AVX2(dst []int32, base int32)
     6  TEXT ·broadcastRangeInt32AVX2(SB), NOSPLIT, $0-28
     7      MOVQ dst_base+0(FP), AX
     8      MOVQ dst_len+8(FP), BX
     9      MOVL base+24(FP), CX
    10      XORQ SI, SI
    11  
    12      CMPQ BX, $8
    13      JB test1x4
    14  
    15      VMOVDQU ·range0n8(SB), Y0         // [0,1,2,3,4,5,6,7]
    16      VPBROADCASTD ·range0n8+32(SB), Y1 // [8,8,8,8,8,8,8,8]
    17      VPBROADCASTD base+24(FP), Y2      // [base...]
    18      VPADDD Y2, Y0, Y0                 // [base,base+1,...]
    19  
    20      MOVQ BX, DI
    21      SHRQ $3, DI
    22      SHLQ $3, DI
    23      JMP test8x4
    24  loop8x4:
    25      VMOVDQU Y0, (AX)(SI*4)
    26      VPADDD Y1, Y0, Y0
    27      ADDQ $8, SI
    28  test8x4:
    29      CMPQ SI, DI
    30      JNE loop8x4
    31      VZEROUPPER
    32      JMP test1x4
    33  
    34  loop1x4:
    35      INCQ SI
    36      MOVL CX, DX
    37      IMULL SI, DX
    38      MOVL DX, -4(AX)(SI*4)
    39  test1x4:
    40      CMPQ SI, BX
    41      JNE loop1x4
    42      RET
    43  
    44  // func writePointersBE128(values [][16]byte, rows sparse.Array)
    45  TEXT ·writePointersBE128(SB), NOSPLIT, $0-48
    46      MOVQ values_base+0(FP), AX
    47      MOVQ rows_array_ptr+24(FP), BX
    48      MOVQ rows_array_len+32(FP), CX
    49      MOVQ rows_array_off+40(FP), DX
    50  
    51      XORQ SI, SI
    52      JMP test
    53  loop:
    54      PXOR X0, X0
    55      MOVQ (BX), DI // *[16]byte
    56      CMPQ DI, $0
    57      JE next
    58      MOVOU (DI), X0
    59  next:
    60      MOVOU X0, (AX)
    61      ADDQ $16, AX
    62      ADDQ DX, BX
    63      INCQ SI
    64  test:
    65      CMPQ SI, CX
    66      JNE loop
    67      RET