github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/distancer/asm/l2_arm64.s (about) 1 //go:build !noasm && arm64 2 // AUTO-GENERATED BY GOAT -- DO NOT EDIT 3 4 TEXT ·l2(SB), $0-32 5 MOVD a+0(FP), R0 6 MOVD b+8(FP), R1 7 MOVD res+16(FP), R2 8 MOVD len+24(FP), R3 9 WORD $0xa9bf7bfd // stp x29, x30, [sp, 10 WORD $0xf9400068 // ldr x8, [x3] 11 WORD $0x910003fd // mov x29, sp 12 WORD $0x6b0803e9 // negs w9, w8 13 WORD $0x1200050a // and w10, w8, 14 WORD $0x12000529 // and w9, w9, 15 WORD $0x5a89454a // csneg w10, w10, w9, mi 16 WORD $0x4b0a0109 // sub w9, w8, w10 17 WORD $0x7100413f // cmp w9, 18 WORD $0x540000ea // b.ge .LBB0_2 19 WORD $0x6f00e400 // movi v0.2d, 20 WORD $0x2a1f03eb // mov w11, wzr 21 WORD $0x6f00e401 // movi v1.2d, 22 WORD $0x6f00e403 // movi v3.2d, 23 WORD $0x6f00e402 // movi v2.2d, 24 WORD $0x1400001a // b .LBB0_4 25 26 LBB0_2: 27 WORD $0x6f00e402 // movi v2.2d, 28 WORD $0xaa1f03eb // mov x11, xzr 29 WORD $0x6f00e403 // movi v3.2d, 30 WORD $0xaa0003ec // mov x12, x0 31 WORD $0x6f00e401 // movi v1.2d, 32 WORD $0xaa0103ed // mov x13, x1 33 WORD $0x6f00e400 // movi v0.2d, 34 35 LBB0_3: 36 WORD $0x4cdf2984 // ld1 { v4.4s, v5.4s, v6.4s, v7.4s }, [x12], 37 WORD $0x9100816e // add x14, x11, 38 WORD $0x9100416b // add x11, x11, 39 WORD $0xeb0901df // cmp x14, x9 40 WORD $0x4cdf29b0 // ld1 { v16.4s, v17.4s, v18.4s, v19.4s }, [x13], 41 WORD $0x4eb0d494 // fsub v20.4s, v4.4s, v16.4s 42 WORD $0x4eb1d4b5 // fsub v21.4s, v5.4s, v17.4s 43 WORD $0x4eb2d4d6 // fsub v22.4s, v6.4s, v18.4s 44 WORD $0x4eb3d4e4 // fsub v4.4s, v7.4s, v19.4s 45 WORD $0x6e34de85 // fmul v5.4s, v20.4s, v20.4s 46 WORD $0x6e35dea6 // fmul v6.4s, v21.4s, v21.4s 47 WORD $0x6e36dec7 // fmul v7.4s, v22.4s, v22.4s 48 WORD $0x6e24dc84 // fmul v4.4s, v4.4s, v4.4s 49 WORD $0x4e25d442 // fadd v2.4s, v2.4s, v5.4s 50 WORD $0x4e26d463 // fadd v3.4s, v3.4s, v6.4s 51 WORD $0x4e27d421 // fadd v1.4s, v1.4s, v7.4s 52 WORD $0x4e24d400 // fadd v0.4s, v0.4s, v4.4s 53 WORD $0x54fffde9 // b.ls .LBB0_3 54 55 LBB0_4: 56 WORD $0x6b09017f // cmp w11, w9 57 WORD $0x540001ea // b.ge .LBB0_7 58 WORD $0x2a0b03eb // mov w11, w11 59 WORD $0x2a0903ec // mov w12, w9 60 WORD $0xd37ef56e // lsl x14, x11, 61 WORD $0x93407d8c // sxtw x12, w12 62 WORD $0x8b0e002d // add x13, x1, x14 63 WORD $0x8b0e000e // add x14, x0, x14 64 65 LBB0_6: 66 WORD $0x3cc105c4 // ldr q4, [x14], 67 WORD $0x3cc105a5 // ldr q5, [x13], 68 WORD $0x9100116b // add x11, x11, 69 WORD $0xeb0c017f // cmp x11, x12 70 WORD $0x4ea5d484 // fsub v4.4s, v4.4s, v5.4s 71 WORD $0x6e24dc84 // fmul v4.4s, v4.4s, v4.4s 72 WORD $0x4e24d442 // fadd v2.4s, v2.4s, v4.4s 73 WORD $0x54ffff2b // b.lt .LBB0_6 74 75 LBB0_7: 76 WORD $0x6e22d442 // faddp v2.4s, v2.4s, v2.4s 77 WORD $0x7100055f // cmp w10, 78 WORD $0x6e23d463 // faddp v3.4s, v3.4s, v3.4s 79 WORD $0x6e21d421 // faddp v1.4s, v1.4s, v1.4s 80 WORD $0x6e20d400 // faddp v0.4s, v0.4s, v0.4s 81 WORD $0x7e30d842 // faddp s2, v2.2s 82 WORD $0x7e30d863 // faddp s3, v3.2s 83 WORD $0x7e30d821 // faddp s1, v1.2s 84 WORD $0x7e30d800 // faddp s0, v0.2s 85 WORD $0x1e232842 // fadd s2, s2, s3 86 WORD $0x1e212841 // fadd s1, s2, s1 87 WORD $0x1e202820 // fadd s0, s1, s0 88 WORD $0x5400066b // b.lt .LBB0_13 89 WORD $0x93407d08 // sxtw x8, w8 90 WORD $0x93407d29 // sxtw x9, w9 91 WORD $0x9100052a // add x10, x9, 92 WORD $0xeb08015f // cmp x10, x8 93 WORD $0x9a89d50a // csinc x10, x8, x9, le 94 WORD $0xcb09014a // sub x10, x10, x9 95 WORD $0xf100215f // cmp x10, 96 WORD $0x54000443 // b.lo .LBB0_12 97 WORD $0xd37ef52c // lsl x12, x9, 98 WORD $0x927df14b // and x11, x10, 99 WORD $0x9100418d // add x13, x12, 100 WORD $0x8b090169 // add x9, x11, x9 101 WORD $0x8b0d000c // add x12, x0, x13 102 WORD $0x8b0d002d // add x13, x1, x13 103 WORD $0xaa0b03ee // mov x14, x11 104 105 LBB0_10: 106 WORD $0xad7f8d81 // ldp q1, q3, [x12, 107 WORD $0xf10021ce // subs x14, x14, 108 WORD $0x9100818c // add x12, x12, 109 WORD $0xad7f91a2 // ldp q2, q4, [x13, 110 WORD $0x910081ad // add x13, x13, 111 WORD $0x4ea2d421 // fsub v1.4s, v1.4s, v2.4s 112 WORD $0x6e21dc21 // fmul v1.4s, v1.4s, v1.4s 113 WORD $0x5e0c0422 // mov s2, v1.s[1] 114 WORD $0x1e212800 // fadd s0, s0, s1 115 WORD $0x5e140425 // mov s5, v1.s[2] 116 WORD $0x5e1c0421 // mov s1, v1.s[3] 117 WORD $0x1e222800 // fadd s0, s0, s2 118 WORD $0x4ea4d462 // fsub v2.4s, v3.4s, v4.4s 119 WORD $0x1e252800 // fadd s0, s0, s5 120 WORD $0x6e22dc42 // fmul v2.4s, v2.4s, v2.4s 121 WORD $0x1e212800 // fadd s0, s0, s1 122 WORD $0x5e0c0441 // mov s1, v2.s[1] 123 WORD $0x5e140443 // mov s3, v2.s[2] 124 WORD $0x1e222800 // fadd s0, s0, s2 125 WORD $0x1e212800 // fadd s0, s0, s1 126 WORD $0x5e1c0441 // mov s1, v2.s[3] 127 WORD $0x1e232800 // fadd s0, s0, s3 128 WORD $0x1e212800 // fadd s0, s0, s1 129 WORD $0x54fffd21 // b.ne .LBB0_10 130 WORD $0xeb0b015f // cmp x10, x11 131 WORD $0x54000140 // b.eq .LBB0_13 132 133 LBB0_12: 134 WORD $0xd37ef52a // lsl x10, x9, 135 WORD $0x91000529 // add x9, x9, 136 WORD $0xeb08013f // cmp x9, x8 137 WORD $0xbc6a6801 // ldr s1, [x0, x10] 138 WORD $0xbc6a6822 // ldr s2, [x1, x10] 139 WORD $0x1e223821 // fsub s1, s1, s2 140 WORD $0x1e210821 // fmul s1, s1, s1 141 WORD $0x1e212800 // fadd s0, s0, s1 142 WORD $0x54ffff0b // b.lt .LBB0_12 143 144 LBB0_13: 145 WORD $0xbd000040 // str s0, [x2] 146 WORD $0xa8c17bfd // ldp x29, x30, [sp], 147 WORD $0xd65f03c0 // ret