github.com/comwrg/go/src@v0.0.0-20220319063731-c238d0440370/runtime/memclr_arm64.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // See memclrNoHeapPointers Go doc for important implementation constraints. 8 9 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) 10 // Also called from assembly in sys_windows_arm64.s without g (but using Go stack convention). 11 TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-16 12 MOVD ptr+0(FP), R0 13 MOVD n+8(FP), R1 14 15 CMP $16, R1 16 // If n is equal to 16 bytes, use zero_exact_16 to zero 17 BEQ zero_exact_16 18 19 // If n is greater than 16 bytes, use zero_by_16 to zero 20 BHI zero_by_16 21 22 // n is less than 16 bytes 23 ADD R1, R0, R7 24 TBZ $3, R1, less_than_8 25 MOVD ZR, (R0) 26 MOVD ZR, -8(R7) 27 RET 28 29 less_than_8: 30 TBZ $2, R1, less_than_4 31 MOVW ZR, (R0) 32 MOVW ZR, -4(R7) 33 RET 34 35 less_than_4: 36 CBZ R1, ending 37 MOVB ZR, (R0) 38 TBZ $1, R1, ending 39 MOVH ZR, -2(R7) 40 41 ending: 42 RET 43 44 zero_exact_16: 45 // n is exactly 16 bytes 46 STP (ZR, ZR), (R0) 47 RET 48 49 zero_by_16: 50 // n greater than 16 bytes, check if the start address is aligned 51 NEG R0, R4 52 ANDS $15, R4, R4 53 // Try zeroing using zva if the start address is aligned with 16 54 BEQ try_zva 55 56 // Non-aligned store 57 STP (ZR, ZR), (R0) 58 // Make the destination aligned 59 SUB R4, R1, R1 60 ADD R4, R0, R0 61 B try_zva 62 63 tail_maybe_long: 64 CMP $64, R1 65 BHS no_zva 66 67 tail63: 68 ANDS $48, R1, R3 69 BEQ last16 70 CMPW $32, R3 71 BEQ last48 72 BLT last32 73 STP.P (ZR, ZR), 16(R0) 74 last48: 75 STP.P (ZR, ZR), 16(R0) 76 last32: 77 STP.P (ZR, ZR), 16(R0) 78 // The last store length is at most 16, so it is safe to use 79 // stp to write last 16 bytes 80 last16: 81 ANDS $15, R1, R1 82 CBZ R1, last_end 83 ADD R1, R0, R0 84 STP (ZR, ZR), -16(R0) 85 last_end: 86 RET 87 88 no_zva: 89 SUB $16, R0, R0 90 SUB $64, R1, R1 91 92 loop_64: 93 STP (ZR, ZR), 16(R0) 94 STP (ZR, ZR), 32(R0) 95 STP (ZR, ZR), 48(R0) 96 STP.W (ZR, ZR), 64(R0) 97 SUBS $64, R1, R1 98 BGE loop_64 99 ANDS $63, R1, ZR 100 ADD $16, R0, R0 101 BNE tail63 102 RET 103 104 try_zva: 105 // Try using the ZVA feature to zero entire cache lines 106 // It is not meaningful to use ZVA if the block size is less than 64, 107 // so make sure that n is greater than or equal to 64 108 CMP $63, R1 109 BLE tail63 110 111 CMP $128, R1 112 // Ensure n is at least 128 bytes, so that there is enough to copy after 113 // alignment. 114 BLT no_zva 115 // Check if ZVA is allowed from user code, and if so get the block size 116 MOVW block_size<>(SB), R5 117 TBNZ $31, R5, no_zva 118 CBNZ R5, zero_by_line 119 // DCZID_EL0 bit assignments 120 // [63:5] Reserved 121 // [4] DZP, if bit set DC ZVA instruction is prohibited, else permitted 122 // [3:0] log2 of the block size in words, eg. if it returns 0x4 then block size is 16 words 123 MRS DCZID_EL0, R3 124 TBZ $4, R3, init 125 // ZVA not available 126 MOVW $~0, R5 127 MOVW R5, block_size<>(SB) 128 B no_zva 129 130 init: 131 MOVW $4, R9 132 ANDW $15, R3, R5 133 LSLW R5, R9, R5 134 MOVW R5, block_size<>(SB) 135 136 ANDS $63, R5, R9 137 // Block size is less than 64. 138 BNE no_zva 139 140 zero_by_line: 141 CMP R5, R1 142 // Not enough memory to reach alignment 143 BLO no_zva 144 SUB $1, R5, R6 145 NEG R0, R4 146 ANDS R6, R4, R4 147 // Already aligned 148 BEQ aligned 149 150 // check there is enough to copy after alignment 151 SUB R4, R1, R3 152 153 // Check that the remaining length to ZVA after alignment 154 // is greater than 64. 155 CMP $64, R3 156 CCMP GE, R3, R5, $10 // condition code GE, NZCV=0b1010 157 BLT no_zva 158 159 // We now have at least 64 bytes to zero, update n 160 MOVD R3, R1 161 162 loop_zva_prolog: 163 STP (ZR, ZR), (R0) 164 STP (ZR, ZR), 16(R0) 165 STP (ZR, ZR), 32(R0) 166 SUBS $64, R4, R4 167 STP (ZR, ZR), 48(R0) 168 ADD $64, R0, R0 169 BGE loop_zva_prolog 170 171 ADD R4, R0, R0 172 173 aligned: 174 SUB R5, R1, R1 175 176 loop_zva: 177 WORD $0xd50b7420 // DC ZVA, R0 178 ADD R5, R0, R0 179 SUBS R5, R1, R1 180 BHS loop_zva 181 ANDS R6, R1, R1 182 BNE tail_maybe_long 183 RET 184 185 GLOBL block_size<>(SB), NOPTR, $8