github.com/parquet-go/parquet-go@v0.20.0/column_buffer_amd64.s (about) 1 //go:build !purego 2 3 #include "textflag.h" 4 5 // func broadcastRangeInt32AVX2(dst []int32, base int32) 6 TEXT ·broadcastRangeInt32AVX2(SB), NOSPLIT, $0-28 7 MOVQ dst_base+0(FP), AX 8 MOVQ dst_len+8(FP), BX 9 MOVL base+24(FP), CX 10 XORQ SI, SI 11 12 CMPQ BX, $8 13 JB test1x4 14 15 VMOVDQU ·range0n8(SB), Y0 // [0,1,2,3,4,5,6,7] 16 VPBROADCASTD ·range0n8+32(SB), Y1 // [8,8,8,8,8,8,8,8] 17 VPBROADCASTD base+24(FP), Y2 // [base...] 18 VPADDD Y2, Y0, Y0 // [base,base+1,...] 19 20 MOVQ BX, DI 21 SHRQ $3, DI 22 SHLQ $3, DI 23 JMP test8x4 24 loop8x4: 25 VMOVDQU Y0, (AX)(SI*4) 26 VPADDD Y1, Y0, Y0 27 ADDQ $8, SI 28 test8x4: 29 CMPQ SI, DI 30 JNE loop8x4 31 VZEROUPPER 32 JMP test1x4 33 34 loop1x4: 35 INCQ SI 36 MOVL CX, DX 37 IMULL SI, DX 38 MOVL DX, -4(AX)(SI*4) 39 test1x4: 40 CMPQ SI, BX 41 JNE loop1x4 42 RET 43 44 // func writePointersBE128(values [][16]byte, rows sparse.Array) 45 TEXT ·writePointersBE128(SB), NOSPLIT, $0-48 46 MOVQ values_base+0(FP), AX 47 MOVQ rows_array_ptr+24(FP), BX 48 MOVQ rows_array_len+32(FP), CX 49 MOVQ rows_array_off+40(FP), DX 50 51 XORQ SI, SI 52 JMP test 53 loop: 54 PXOR X0, X0 55 MOVQ (BX), DI // *[16]byte 56 CMPQ DI, $0 57 JE next 58 MOVOU (DI), X0 59 next: 60 MOVOU X0, (AX) 61 ADDQ $16, AX 62 ADDQ DX, BX 63 INCQ SI 64 test: 65 CMPQ SI, CX 66 JNE loop 67 RET