github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/distancer/asm/l2_amd64.s (about)

     1  // Code generated by command: go run l2.go -out l2_amd64.s -stubs l2_stub_amd64.go. DO NOT EDIT.
     2  
     3  #include "textflag.h"
     4  
     5  // func L2(x []float32, y []float32) float32
     6  // Requires: AVX, FMA3, SSE
     7  TEXT ·L2(SB), NOSPLIT, $0-52
     8  	MOVQ   x_base+0(FP), AX
     9  	MOVQ   y_base+24(FP), CX
    10  	MOVQ   x_len+8(FP), DX
    11  	VXORPS Y0, Y0, Y0
    12  	VXORPS Y1, Y1, Y1
    13  	VXORPS Y2, Y2, Y2
    14  	VXORPS Y3, Y3, Y3
    15  	VXORPS Y4, Y4, Y4
    16  	VXORPS Y5, Y5, Y5
    17  	VXORPS Y6, Y6, Y6
    18  	VXORPS Y7, Y7, Y7
    19  
    20  blockloop:
    21  	CMPQ        DX, $0x00000020
    22  	JL          tail
    23  	VMOVUPS     (AX), Y1
    24  	VMOVUPS     32(AX), Y3
    25  	VMOVUPS     64(AX), Y5
    26  	VMOVUPS     96(AX), Y7
    27  	VSUBPS      (CX), Y1, Y1
    28  	VSUBPS      32(CX), Y3, Y3
    29  	VSUBPS      64(CX), Y5, Y5
    30  	VSUBPS      96(CX), Y7, Y7
    31  	VFMADD231PS Y1, Y1, Y0
    32  	VFMADD231PS Y3, Y3, Y2
    33  	VFMADD231PS Y5, Y5, Y4
    34  	VFMADD231PS Y7, Y7, Y6
    35  	ADDQ        $0x00000080, AX
    36  	ADDQ        $0x00000080, CX
    37  	SUBQ        $0x00000020, DX
    38  	JMP         blockloop
    39  
    40  tail:
    41  	VXORPS X1, X1, X1
    42  
    43  tailloop:
    44  	CMPQ        DX, $0x00000000
    45  	JE          reduce
    46  	VMOVSS      (AX), X3
    47  	VSUBSS      (CX), X3, X3
    48  	VFMADD231SS X3, X3, X1
    49  	ADDQ        $0x00000004, AX
    50  	ADDQ        $0x00000004, CX
    51  	DECQ        DX
    52  	JMP         tailloop
    53  
    54  reduce:
    55  	VADDPS       Y0, Y2, Y0
    56  	VADDPS       Y4, Y6, Y4
    57  	VADDPS       Y0, Y4, Y0
    58  	VEXTRACTF128 $0x01, Y0, X2
    59  	VADDPS       X0, X2, X0
    60  	VADDPS       X0, X1, X0
    61  	VHADDPS      X0, X0, X0
    62  	VHADDPS      X0, X0, X0
    63  	MOVSS        X0, ret+48(FP)
    64  	RET