github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/distancer/asm/dot_amd64.s (about) 1 // Code generated by command: go run dot.go -out dot.s -stubs dot_stub.go. DO NOT EDIT. 2 3 #include "textflag.h" 4 5 // func Dot(x []float32, y []float32) float32 6 // Requires: AVX, FMA3, SSE 7 TEXT ·Dot(SB), NOSPLIT, $0-52 8 MOVQ x_base+0(FP), AX 9 MOVQ y_base+24(FP), CX 10 MOVQ x_len+8(FP), DX 11 VXORPS Y0, Y0, Y0 12 VXORPS Y1, Y1, Y1 13 VXORPS Y2, Y2, Y2 14 VXORPS Y3, Y3, Y3 15 16 blockloop: 17 CMPQ DX, $0x00000020 18 JL tail 19 VMOVUPS (AX), Y4 20 VMOVUPS 32(AX), Y5 21 VMOVUPS 64(AX), Y6 22 VMOVUPS 96(AX), Y7 23 VFMADD231PS (CX), Y4, Y0 24 VFMADD231PS 32(CX), Y5, Y1 25 VFMADD231PS 64(CX), Y6, Y2 26 VFMADD231PS 96(CX), Y7, Y3 27 ADDQ $0x00000080, AX 28 ADDQ $0x00000080, CX 29 SUBQ $0x00000020, DX 30 JMP blockloop 31 32 tail: 33 VXORPS X4, X4, X4 34 35 tailloop: 36 CMPQ DX, $0x00000000 37 JE reduce 38 VMOVSS (AX), X5 39 VFMADD231SS (CX), X5, X4 40 ADDQ $0x00000004, AX 41 ADDQ $0x00000004, CX 42 DECQ DX 43 JMP tailloop 44 45 reduce: 46 VADDPS Y0, Y1, Y0 47 VADDPS Y2, Y3, Y2 48 VADDPS Y0, Y2, Y0 49 VEXTRACTF128 $0x01, Y0, X1 50 VADDPS X0, X1, X0 51 VADDPS X0, X4, X0 52 VHADDPS X0, X0, X0 53 VHADDPS X0, X0, X0 54 MOVSS X0, ret+48(FP) 55 RET