github.com/lzhfromustc/gofuzz@v0.0.0-20211116160056-151b3108bbd1/runtime/memclr_arm64.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // See memclrNoHeapPointers Go doc for important implementation constraints. 8 9 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) 10 TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-16 11 MOVD ptr+0(FP), R0 12 MOVD n+8(FP), R1 13 14 CMP $16, R1 15 // If n is equal to 16 bytes, use zero_exact_16 to zero 16 BEQ zero_exact_16 17 18 // If n is greater than 16 bytes, use zero_by_16 to zero 19 BHI zero_by_16 20 21 // n is less than 16 bytes 22 ADD R1, R0, R7 23 TBZ $3, R1, less_than_8 24 MOVD ZR, (R0) 25 MOVD ZR, -8(R7) 26 RET 27 28 less_than_8: 29 TBZ $2, R1, less_than_4 30 MOVW ZR, (R0) 31 MOVW ZR, -4(R7) 32 RET 33 34 less_than_4: 35 CBZ R1, ending 36 MOVB ZR, (R0) 37 TBZ $1, R1, ending 38 MOVH ZR, -2(R7) 39 40 ending: 41 RET 42 43 zero_exact_16: 44 // n is exactly 16 bytes 45 STP (ZR, ZR), (R0) 46 RET 47 48 zero_by_16: 49 // n greater than 16 bytes, check if the start address is aligned 50 NEG R0, R4 51 ANDS $15, R4, R4 52 // Try zeroing using zva if the start address is aligned with 16 53 BEQ try_zva 54 55 // Non-aligned store 56 STP (ZR, ZR), (R0) 57 // Make the destination aligned 58 SUB R4, R1, R1 59 ADD R4, R0, R0 60 B try_zva 61 62 tail_maybe_long: 63 CMP $64, R1 64 BHS no_zva 65 66 tail63: 67 ANDS $48, R1, R3 68 BEQ last16 69 CMPW $32, R3 70 BEQ last48 71 BLT last32 72 STP.P (ZR, ZR), 16(R0) 73 last48: 74 STP.P (ZR, ZR), 16(R0) 75 last32: 76 STP.P (ZR, ZR), 16(R0) 77 // The last store length is at most 16, so it is safe to use 78 // stp to write last 16 bytes 79 last16: 80 ANDS $15, R1, R1 81 CBZ R1, last_end 82 ADD R1, R0, R0 83 STP (ZR, ZR), -16(R0) 84 last_end: 85 RET 86 87 no_zva: 88 SUB $16, R0, R0 89 SUB $64, R1, R1 90 91 loop_64: 92 STP (ZR, ZR), 16(R0) 93 STP (ZR, ZR), 32(R0) 94 STP (ZR, ZR), 48(R0) 95 STP.W (ZR, ZR), 64(R0) 96 SUBS $64, R1, R1 97 BGE loop_64 98 ANDS $63, R1, ZR 99 ADD $16, R0, R0 100 BNE tail63 101 RET 102 103 try_zva: 104 // Try using the ZVA feature to zero entire cache lines 105 // It is not meaningful to use ZVA if the block size is less than 64, 106 // so make sure that n is greater than or equal to 64 107 CMP $63, R1 108 BLE tail63 109 110 CMP $128, R1 111 // Ensure n is at least 128 bytes, so that there is enough to copy after 112 // alignment. 113 BLT no_zva 114 // Check if ZVA is allowed from user code, and if so get the block size 115 MOVW block_size<>(SB), R5 116 TBNZ $31, R5, no_zva 117 CBNZ R5, zero_by_line 118 // DCZID_EL0 bit assignments 119 // [63:5] Reserved 120 // [4] DZP, if bit set DC ZVA instruction is prohibited, else permitted 121 // [3:0] log2 of the block size in words, eg. if it returns 0x4 then block size is 16 words 122 MRS DCZID_EL0, R3 123 TBZ $4, R3, init 124 // ZVA not available 125 MOVW $~0, R5 126 MOVW R5, block_size<>(SB) 127 B no_zva 128 129 init: 130 MOVW $4, R9 131 ANDW $15, R3, R5 132 LSLW R5, R9, R5 133 MOVW R5, block_size<>(SB) 134 135 ANDS $63, R5, R9 136 // Block size is less than 64. 137 BNE no_zva 138 139 zero_by_line: 140 CMP R5, R1 141 // Not enough memory to reach alignment 142 BLO no_zva 143 SUB $1, R5, R6 144 NEG R0, R4 145 ANDS R6, R4, R4 146 // Already aligned 147 BEQ aligned 148 149 // check there is enough to copy after alignment 150 SUB R4, R1, R3 151 152 // Check that the remaining length to ZVA after alignment 153 // is greater than 64. 154 CMP $64, R3 155 CCMP GE, R3, R5, $10 // condition code GE, NZCV=0b1010 156 BLT no_zva 157 158 // We now have at least 64 bytes to zero, update n 159 MOVD R3, R1 160 161 loop_zva_prolog: 162 STP (ZR, ZR), (R0) 163 STP (ZR, ZR), 16(R0) 164 STP (ZR, ZR), 32(R0) 165 SUBS $64, R4, R4 166 STP (ZR, ZR), 48(R0) 167 ADD $64, R0, R0 168 BGE loop_zva_prolog 169 170 ADD R4, R0, R0 171 172 aligned: 173 SUB R5, R1, R1 174 175 loop_zva: 176 WORD $0xd50b7420 // DC ZVA, R0 177 ADD R5, R0, R0 178 SUBS R5, R1, R1 179 BHS loop_zva 180 ANDS R6, R1, R1 181 BNE tail_maybe_long 182 RET 183 184 GLOBL block_size<>(SB), NOPTR, $8