github.com/afumu/libc@v0.0.6/musl/src/string/aarch64/memset.S (about)

     1  /*
     2   * memset - fill memory with a constant byte
     3   *
     4   * Copyright (c) 2012-2020, Arm Limited.
     5   * SPDX-License-Identifier: MIT
     6   */
     7  
     8  /* Assumptions:
     9   *
    10   * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
    11   *
    12   */
    13  
    14  #define dstin   x0
    15  #define val     x1
    16  #define valw    w1
    17  #define count   x2
    18  #define dst     x3
    19  #define dstend  x4
    20  #define zva_val x5
    21  
    22  .global memset
    23  .type memset,%function
    24  memset:
    25  
    26  	dup     v0.16B, valw
    27  	add     dstend, dstin, count
    28  
    29  	cmp     count, 96
    30  	b.hi    .Lset_long
    31  	cmp     count, 16
    32  	b.hs    .Lset_medium
    33  	mov     val, v0.D[0]
    34  
    35  	/* Set 0..15 bytes.  */
    36  	tbz     count, 3, 1f
    37  	str     val, [dstin]
    38  	str     val, [dstend, -8]
    39  	ret
    40  	nop
    41  1:      tbz     count, 2, 2f
    42  	str     valw, [dstin]
    43  	str     valw, [dstend, -4]
    44  	ret
    45  2:      cbz     count, 3f
    46  	strb    valw, [dstin]
    47  	tbz     count, 1, 3f
    48  	strh    valw, [dstend, -2]
    49  3:      ret
    50  
    51  	/* Set 17..96 bytes.  */
    52  .Lset_medium:
    53  	str     q0, [dstin]
    54  	tbnz    count, 6, .Lset96
    55  	str     q0, [dstend, -16]
    56  	tbz     count, 5, 1f
    57  	str     q0, [dstin, 16]
    58  	str     q0, [dstend, -32]
    59  1:      ret
    60  
    61  	.p2align 4
    62  	/* Set 64..96 bytes.  Write 64 bytes from the start and
    63  	   32 bytes from the end.  */
    64  .Lset96:
    65  	str     q0, [dstin, 16]
    66  	stp     q0, q0, [dstin, 32]
    67  	stp     q0, q0, [dstend, -32]
    68  	ret
    69  
    70  	.p2align 4
    71  .Lset_long:
    72  	and     valw, valw, 255
    73  	bic     dst, dstin, 15
    74  	str     q0, [dstin]
    75  	cmp     count, 160
    76  	ccmp    valw, 0, 0, hs
    77  	b.ne    .Lno_zva
    78  
    79  #ifndef SKIP_ZVA_CHECK
    80  	mrs     zva_val, dczid_el0
    81  	and     zva_val, zva_val, 31
    82  	cmp     zva_val, 4              /* ZVA size is 64 bytes.  */
    83  	b.ne    .Lno_zva
    84  #endif
    85  	str     q0, [dst, 16]
    86  	stp     q0, q0, [dst, 32]
    87  	bic     dst, dst, 63
    88  	sub     count, dstend, dst      /* Count is now 64 too large.  */
    89  	sub     count, count, 128       /* Adjust count and bias for loop.  */
    90  
    91  	.p2align 4
    92  .Lzva_loop:
    93  	add     dst, dst, 64
    94  	dc      zva, dst
    95  	subs    count, count, 64
    96  	b.hi    .Lzva_loop
    97  	stp     q0, q0, [dstend, -64]
    98  	stp     q0, q0, [dstend, -32]
    99  	ret
   100  
   101  .Lno_zva:
   102  	sub     count, dstend, dst      /* Count is 16 too large.  */
   103  	sub     dst, dst, 16            /* Dst is biased by -32.  */
   104  	sub     count, count, 64 + 16   /* Adjust count and bias for loop.  */
   105  .Lno_zva_loop:
   106  	stp     q0, q0, [dst, 32]
   107  	stp     q0, q0, [dst, 64]!
   108  	subs    count, count, 64
   109  	b.hi    .Lno_zva_loop
   110  	stp     q0, q0, [dstend, -64]
   111  	stp     q0, q0, [dstend, -32]
   112  	ret
   113  
   114  .size memset,.-memset
   115