github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/distancer/asm/dot_amd64.s (about)

     1  // Code generated by command: go run dot.go -out dot.s -stubs dot_stub.go. DO NOT EDIT.
     2  
     3  #include "textflag.h"
     4  
     5  // func Dot(x []float32, y []float32) float32
     6  // Requires: AVX, FMA3, SSE
     7  TEXT ·Dot(SB), NOSPLIT, $0-52
     8  	MOVQ   x_base+0(FP), AX
     9  	MOVQ   y_base+24(FP), CX
    10  	MOVQ   x_len+8(FP), DX
    11  	VXORPS Y0, Y0, Y0
    12  	VXORPS Y1, Y1, Y1
    13  	VXORPS Y2, Y2, Y2
    14  	VXORPS Y3, Y3, Y3
    15  
    16  blockloop:
    17  	CMPQ        DX, $0x00000020
    18  	JL          tail
    19  	VMOVUPS     (AX), Y4
    20  	VMOVUPS     32(AX), Y5
    21  	VMOVUPS     64(AX), Y6
    22  	VMOVUPS     96(AX), Y7
    23  	VFMADD231PS (CX), Y4, Y0
    24  	VFMADD231PS 32(CX), Y5, Y1
    25  	VFMADD231PS 64(CX), Y6, Y2
    26  	VFMADD231PS 96(CX), Y7, Y3
    27  	ADDQ        $0x00000080, AX
    28  	ADDQ        $0x00000080, CX
    29  	SUBQ        $0x00000020, DX
    30  	JMP         blockloop
    31  
    32  tail:
    33  	VXORPS X4, X4, X4
    34  
    35  tailloop:
    36  	CMPQ        DX, $0x00000000
    37  	JE          reduce
    38  	VMOVSS      (AX), X5
    39  	VFMADD231SS (CX), X5, X4
    40  	ADDQ        $0x00000004, AX
    41  	ADDQ        $0x00000004, CX
    42  	DECQ        DX
    43  	JMP         tailloop
    44  
    45  reduce:
    46  	VADDPS       Y0, Y1, Y0
    47  	VADDPS       Y2, Y3, Y2
    48  	VADDPS       Y0, Y2, Y0
    49  	VEXTRACTF128 $0x01, Y0, X1
    50  	VADDPS       X0, X1, X0
    51  	VADDPS       X0, X4, X0
    52  	VHADDPS      X0, X0, X0
    53  	VHADDPS      X0, X0, X0
    54  	MOVSS        X0, ret+48(FP)
    55  	RET