github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/distancer/asm/dot_arm64.s (about)

     1  //go:build !noasm && arm64
     2  // AUTO-GENERATED BY GOAT -- DO NOT EDIT
     3  
     4  TEXT ·dot(SB), $0-32
     5  	MOVD a+0(FP), R0
     6  	MOVD b+8(FP), R1
     7  	MOVD res+16(FP), R2
     8  	MOVD len+24(FP), R3
     9  	WORD $0xa9bf7bfd    // stp	x29, x30, [sp,
    10  	WORD $0xf9400068    // ldr	x8, [x3]
    11  	WORD $0x910003fd    // mov	x29, sp
    12  	WORD $0x6b0803e9    // negs	w9, w8
    13  	WORD $0x1200050a    // and	w10, w8,
    14  	WORD $0x12000529    // and	w9, w9,
    15  	WORD $0x5a89454a    // csneg	w10, w10, w9, mi
    16  	WORD $0x4b0a0109    // sub	w9, w8, w10
    17  	WORD $0x7100413f    // cmp	w9,
    18  	WORD $0x540000ea    // b.ge	.LBB0_2
    19  	WORD $0x6f00e400    // movi	v0.2d,
    20  	WORD $0x2a1f03eb    // mov	w11, wzr
    21  	WORD $0x6f00e401    // movi	v1.2d,
    22  	WORD $0x6f00e403    // movi	v3.2d,
    23  	WORD $0x6f00e402    // movi	v2.2d,
    24  	WORD $0x14000016    // b	.LBB0_4
    25  
    26  LBB0_2:
    27  	WORD $0x6f00e402 // movi	v2.2d,
    28  	WORD $0xaa1f03eb // mov	x11, xzr
    29  	WORD $0x6f00e403 // movi	v3.2d,
    30  	WORD $0xaa0003ec // mov	x12, x0
    31  	WORD $0x6f00e401 // movi	v1.2d,
    32  	WORD $0xaa0103ed // mov	x13, x1
    33  	WORD $0x6f00e400 // movi	v0.2d,
    34  
    35  LBB0_3:
    36  	WORD $0x4cdf2984 // ld1	{ v4.4s, v5.4s, v6.4s, v7.4s }, [x12],
    37  	WORD $0x9100816e // add	x14, x11,
    38  	WORD $0x9100416b // add	x11, x11,
    39  	WORD $0xeb0901df // cmp	x14, x9
    40  	WORD $0x4cdf29b0 // ld1	{ v16.4s, v17.4s, v18.4s, v19.4s }, [x13],
    41  	WORD $0x6e30dc94 // fmul	v20.4s, v4.4s, v16.4s
    42  	WORD $0x6e31dcb5 // fmul	v21.4s, v5.4s, v17.4s
    43  	WORD $0x6e32dcd6 // fmul	v22.4s, v6.4s, v18.4s
    44  	WORD $0x6e33dce4 // fmul	v4.4s, v7.4s, v19.4s
    45  	WORD $0x4e34d442 // fadd	v2.4s, v2.4s, v20.4s
    46  	WORD $0x4e35d463 // fadd	v3.4s, v3.4s, v21.4s
    47  	WORD $0x4e36d421 // fadd	v1.4s, v1.4s, v22.4s
    48  	WORD $0x4e24d400 // fadd	v0.4s, v0.4s, v4.4s
    49  	WORD $0x54fffe69 // b.ls	.LBB0_3
    50  
    51  LBB0_4:
    52  	WORD $0x6b09017f // cmp	w11, w9
    53  	WORD $0x540001ca // b.ge	.LBB0_7
    54  	WORD $0x2a0b03eb // mov	w11, w11
    55  	WORD $0x2a0903ec // mov	w12, w9
    56  	WORD $0xd37ef56e // lsl	x14, x11,
    57  	WORD $0x93407d8c // sxtw	x12, w12
    58  	WORD $0x8b0e002d // add	x13, x1, x14
    59  	WORD $0x8b0e000e // add	x14, x0, x14
    60  
    61  LBB0_6:
    62  	WORD $0x3cc105c4 // ldr	q4, [x14],
    63  	WORD $0x3cc105a5 // ldr	q5, [x13],
    64  	WORD $0x9100116b // add	x11, x11,
    65  	WORD $0xeb0c017f // cmp	x11, x12
    66  	WORD $0x6e25dc84 // fmul	v4.4s, v4.4s, v5.4s
    67  	WORD $0x4e24d442 // fadd	v2.4s, v2.4s, v4.4s
    68  	WORD $0x54ffff4b // b.lt	.LBB0_6
    69  
    70  LBB0_7:
    71  	WORD $0x6e22d442 // faddp	v2.4s, v2.4s, v2.4s
    72  	WORD $0x7100055f // cmp	w10,
    73  	WORD $0x6e23d463 // faddp	v3.4s, v3.4s, v3.4s
    74  	WORD $0x6e21d421 // faddp	v1.4s, v1.4s, v1.4s
    75  	WORD $0x6e20d400 // faddp	v0.4s, v0.4s, v0.4s
    76  	WORD $0x7e30d842 // faddp	s2, v2.2s
    77  	WORD $0x7e30d863 // faddp	s3, v3.2s
    78  	WORD $0x7e30d821 // faddp	s1, v1.2s
    79  	WORD $0x7e30d800 // faddp	s0, v0.2s
    80  	WORD $0x1e232842 // fadd	s2, s2, s3
    81  	WORD $0x1e212841 // fadd	s1, s2, s1
    82  	WORD $0x1e202820 // fadd	s0, s1, s0
    83  	WORD $0x540005eb // b.lt	.LBB0_13
    84  	WORD $0x93407d08 // sxtw	x8, w8
    85  	WORD $0x93407d29 // sxtw	x9, w9
    86  	WORD $0x9100052a // add	x10, x9,
    87  	WORD $0xeb08015f // cmp	x10, x8
    88  	WORD $0x9a89d50a // csinc	x10, x8, x9, le
    89  	WORD $0xcb09014a // sub	x10, x10, x9
    90  	WORD $0xf100215f // cmp	x10,
    91  	WORD $0x54000403 // b.lo	.LBB0_12
    92  	WORD $0xd37ef52c // lsl	x12, x9,
    93  	WORD $0x927df14b // and	x11, x10,
    94  	WORD $0x9100418d // add	x13, x12,
    95  	WORD $0x8b090169 // add	x9, x11, x9
    96  	WORD $0x8b0d000c // add	x12, x0, x13
    97  	WORD $0x8b0d002d // add	x13, x1, x13
    98  	WORD $0xaa0b03ee // mov	x14, x11
    99  
   100  LBB0_10:
   101  	WORD $0x3cdf0181 // ldur	q1, [x12,
   102  	WORD $0xf10021ce // subs	x14, x14,
   103  	WORD $0x3cdf01a2 // ldur	q2, [x13,
   104  	WORD $0x6e22dc21 // fmul	v1.4s, v1.4s, v2.4s
   105  	WORD $0x5e0c0422 // mov	s2, v1.s[1]
   106  	WORD $0x1e212800 // fadd	s0, s0, s1
   107  	WORD $0x5e140423 // mov	s3, v1.s[2]
   108  	WORD $0x5e1c0421 // mov	s1, v1.s[3]
   109  	WORD $0x1e222800 // fadd	s0, s0, s2
   110  	WORD $0x3cc20582 // ldr	q2, [x12],
   111  	WORD $0x1e232800 // fadd	s0, s0, s3
   112  	WORD $0x3cc205a3 // ldr	q3, [x13],
   113  	WORD $0x6e23dc42 // fmul	v2.4s, v2.4s, v3.4s
   114  	WORD $0x1e212800 // fadd	s0, s0, s1
   115  	WORD $0x5e0c0441 // mov	s1, v2.s[1]
   116  	WORD $0x1e222800 // fadd	s0, s0, s2
   117  	WORD $0x5e140443 // mov	s3, v2.s[2]
   118  	WORD $0x1e212800 // fadd	s0, s0, s1
   119  	WORD $0x5e1c0441 // mov	s1, v2.s[3]
   120  	WORD $0x1e232800 // fadd	s0, s0, s3
   121  	WORD $0x1e212800 // fadd	s0, s0, s1
   122  	WORD $0x54fffd61 // b.ne	.LBB0_10
   123  	WORD $0xeb0b015f // cmp	x10, x11
   124  	WORD $0x54000100 // b.eq	.LBB0_13
   125  
   126  LBB0_12:
   127  	WORD $0xd37ef52a // lsl	x10, x9,
   128  	WORD $0x91000529 // add	x9, x9,
   129  	WORD $0xeb08013f // cmp	x9, x8
   130  	WORD $0xbc6a6801 // ldr	s1, [x0, x10]
   131  	WORD $0xbc6a6822 // ldr	s2, [x1, x10]
   132  	WORD $0x1f020020 // fmadd	s0, s1, s2, s0
   133  	WORD $0x54ffff4b // b.lt	.LBB0_12
   134  
   135  LBB0_13:
   136  	WORD $0xbd000040 // str	s0, [x2]
   137  	WORD $0xa8c17bfd // ldp	x29, x30, [sp],
   138  	WORD $0xd65f03c0 // ret