github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/distancer/asm/l2_amd64.s (about) 1 // Code generated by command: go run l2.go -out l2_amd64.s -stubs l2_stub_amd64.go. DO NOT EDIT. 2 3 #include "textflag.h" 4 5 // func L2(x []float32, y []float32) float32 6 // Requires: AVX, FMA3, SSE 7 TEXT ·L2(SB), NOSPLIT, $0-52 8 MOVQ x_base+0(FP), AX 9 MOVQ y_base+24(FP), CX 10 MOVQ x_len+8(FP), DX 11 VXORPS Y0, Y0, Y0 12 VXORPS Y1, Y1, Y1 13 VXORPS Y2, Y2, Y2 14 VXORPS Y3, Y3, Y3 15 VXORPS Y4, Y4, Y4 16 VXORPS Y5, Y5, Y5 17 VXORPS Y6, Y6, Y6 18 VXORPS Y7, Y7, Y7 19 20 blockloop: 21 CMPQ DX, $0x00000020 22 JL tail 23 VMOVUPS (AX), Y1 24 VMOVUPS 32(AX), Y3 25 VMOVUPS 64(AX), Y5 26 VMOVUPS 96(AX), Y7 27 VSUBPS (CX), Y1, Y1 28 VSUBPS 32(CX), Y3, Y3 29 VSUBPS 64(CX), Y5, Y5 30 VSUBPS 96(CX), Y7, Y7 31 VFMADD231PS Y1, Y1, Y0 32 VFMADD231PS Y3, Y3, Y2 33 VFMADD231PS Y5, Y5, Y4 34 VFMADD231PS Y7, Y7, Y6 35 ADDQ $0x00000080, AX 36 ADDQ $0x00000080, CX 37 SUBQ $0x00000020, DX 38 JMP blockloop 39 40 tail: 41 VXORPS X1, X1, X1 42 43 tailloop: 44 CMPQ DX, $0x00000000 45 JE reduce 46 VMOVSS (AX), X3 47 VSUBSS (CX), X3, X3 48 VFMADD231SS X3, X3, X1 49 ADDQ $0x00000004, AX 50 ADDQ $0x00000004, CX 51 DECQ DX 52 JMP tailloop 53 54 reduce: 55 VADDPS Y0, Y2, Y0 56 VADDPS Y4, Y6, Y4 57 VADDPS Y0, Y4, Y0 58 VEXTRACTF128 $0x01, Y0, X2 59 VADDPS X0, X2, X0 60 VADDPS X0, X1, X0 61 VHADDPS X0, X0, X0 62 VHADDPS X0, X0, X0 63 MOVSS X0, ret+48(FP) 64 RET