github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/memmove_riscv64.s (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // See memmove Go doc for important implementation constraints. 8 9 // void runtime·memmove(void*, void*, uintptr) 10 TEXT runtime·memmove<ABIInternal>(SB),NOSPLIT,$-0-24 11 // X10 = to 12 // X11 = from 13 // X12 = n 14 BEQ X10, X11, done 15 BEQZ X12, done 16 17 // If the destination is ahead of the source, start at the end of the 18 // buffer and go backward. 19 BGTU X10, X11, backward 20 21 // If less than 8 bytes, do single byte copies. 22 MOV $8, X9 23 BLT X12, X9, f_loop4_check 24 25 // Check alignment - if alignment differs we have to do one byte at a time. 26 AND $7, X10, X5 27 AND $7, X11, X6 28 BNE X5, X6, f_loop8_unaligned_check 29 BEQZ X5, f_loop_check 30 31 // Move one byte at a time until we reach 8 byte alignment. 32 SUB X5, X9, X5 33 SUB X5, X12, X12 34 f_align: 35 SUB $1, X5 36 MOVB 0(X11), X14 37 MOVB X14, 0(X10) 38 ADD $1, X10 39 ADD $1, X11 40 BNEZ X5, f_align 41 42 f_loop_check: 43 MOV $16, X9 44 BLT X12, X9, f_loop8_check 45 MOV $32, X9 46 BLT X12, X9, f_loop16_check 47 MOV $64, X9 48 BLT X12, X9, f_loop32_check 49 f_loop64: 50 MOV 0(X11), X14 51 MOV 8(X11), X15 52 MOV 16(X11), X16 53 MOV 24(X11), X17 54 MOV 32(X11), X18 55 MOV 40(X11), X19 56 MOV 48(X11), X20 57 MOV 56(X11), X21 58 MOV X14, 0(X10) 59 MOV X15, 8(X10) 60 MOV X16, 16(X10) 61 MOV X17, 24(X10) 62 MOV X18, 32(X10) 63 MOV X19, 40(X10) 64 MOV X20, 48(X10) 65 MOV X21, 56(X10) 66 ADD $64, X10 67 ADD $64, X11 68 SUB $64, X12 69 BGE X12, X9, f_loop64 70 BEQZ X12, done 71 72 f_loop32_check: 73 MOV $32, X9 74 BLT X12, X9, f_loop16_check 75 f_loop32: 76 MOV 0(X11), X14 77 MOV 8(X11), X15 78 MOV 16(X11), X16 79 MOV 24(X11), X17 80 MOV X14, 0(X10) 81 MOV X15, 8(X10) 82 MOV X16, 16(X10) 83 MOV X17, 24(X10) 84 ADD $32, X10 85 ADD $32, X11 86 SUB $32, X12 87 BGE X12, X9, f_loop32 88 BEQZ X12, done 89 90 f_loop16_check: 91 MOV $16, X9 92 BLT X12, X9, f_loop8_check 93 f_loop16: 94 MOV 0(X11), X14 95 MOV 8(X11), X15 96 MOV X14, 0(X10) 97 MOV X15, 8(X10) 98 ADD $16, X10 99 ADD $16, X11 100 SUB $16, X12 101 BGE X12, X9, f_loop16 102 BEQZ X12, done 103 104 f_loop8_check: 105 MOV $8, X9 106 BLT X12, X9, f_loop4_check 107 f_loop8: 108 MOV 0(X11), X14 109 MOV X14, 0(X10) 110 ADD $8, X10 111 ADD $8, X11 112 SUB $8, X12 113 BGE X12, X9, f_loop8 114 BEQZ X12, done 115 JMP f_loop4_check 116 117 f_loop8_unaligned_check: 118 MOV $8, X9 119 BLT X12, X9, f_loop4_check 120 f_loop8_unaligned: 121 MOVB 0(X11), X14 122 MOVB 1(X11), X15 123 MOVB 2(X11), X16 124 MOVB 3(X11), X17 125 MOVB 4(X11), X18 126 MOVB 5(X11), X19 127 MOVB 6(X11), X20 128 MOVB 7(X11), X21 129 MOVB X14, 0(X10) 130 MOVB X15, 1(X10) 131 MOVB X16, 2(X10) 132 MOVB X17, 3(X10) 133 MOVB X18, 4(X10) 134 MOVB X19, 5(X10) 135 MOVB X20, 6(X10) 136 MOVB X21, 7(X10) 137 ADD $8, X10 138 ADD $8, X11 139 SUB $8, X12 140 BGE X12, X9, f_loop8_unaligned 141 142 f_loop4_check: 143 MOV $4, X9 144 BLT X12, X9, f_loop1 145 f_loop4: 146 MOVB 0(X11), X14 147 MOVB 1(X11), X15 148 MOVB 2(X11), X16 149 MOVB 3(X11), X17 150 MOVB X14, 0(X10) 151 MOVB X15, 1(X10) 152 MOVB X16, 2(X10) 153 MOVB X17, 3(X10) 154 ADD $4, X10 155 ADD $4, X11 156 SUB $4, X12 157 BGE X12, X9, f_loop4 158 159 f_loop1: 160 BEQZ X12, done 161 MOVB 0(X11), X14 162 MOVB X14, 0(X10) 163 ADD $1, X10 164 ADD $1, X11 165 SUB $1, X12 166 JMP f_loop1 167 168 backward: 169 ADD X10, X12, X10 170 ADD X11, X12, X11 171 172 // If less than 8 bytes, do single byte copies. 173 MOV $8, X9 174 BLT X12, X9, b_loop4_check 175 176 // Check alignment - if alignment differs we have to do one byte at a time. 177 AND $7, X10, X5 178 AND $7, X11, X6 179 BNE X5, X6, b_loop8_unaligned_check 180 BEQZ X5, b_loop_check 181 182 // Move one byte at a time until we reach 8 byte alignment. 183 SUB X5, X12, X12 184 b_align: 185 SUB $1, X5 186 SUB $1, X10 187 SUB $1, X11 188 MOVB 0(X11), X14 189 MOVB X14, 0(X10) 190 BNEZ X5, b_align 191 192 b_loop_check: 193 MOV $16, X9 194 BLT X12, X9, b_loop8_check 195 MOV $32, X9 196 BLT X12, X9, b_loop16_check 197 MOV $64, X9 198 BLT X12, X9, b_loop32_check 199 b_loop64: 200 SUB $64, X10 201 SUB $64, X11 202 MOV 0(X11), X14 203 MOV 8(X11), X15 204 MOV 16(X11), X16 205 MOV 24(X11), X17 206 MOV 32(X11), X18 207 MOV 40(X11), X19 208 MOV 48(X11), X20 209 MOV 56(X11), X21 210 MOV X14, 0(X10) 211 MOV X15, 8(X10) 212 MOV X16, 16(X10) 213 MOV X17, 24(X10) 214 MOV X18, 32(X10) 215 MOV X19, 40(X10) 216 MOV X20, 48(X10) 217 MOV X21, 56(X10) 218 SUB $64, X12 219 BGE X12, X9, b_loop64 220 BEQZ X12, done 221 222 b_loop32_check: 223 MOV $32, X9 224 BLT X12, X9, b_loop16_check 225 b_loop32: 226 SUB $32, X10 227 SUB $32, X11 228 MOV 0(X11), X14 229 MOV 8(X11), X15 230 MOV 16(X11), X16 231 MOV 24(X11), X17 232 MOV X14, 0(X10) 233 MOV X15, 8(X10) 234 MOV X16, 16(X10) 235 MOV X17, 24(X10) 236 SUB $32, X12 237 BGE X12, X9, b_loop32 238 BEQZ X12, done 239 240 b_loop16_check: 241 MOV $16, X9 242 BLT X12, X9, b_loop8_check 243 b_loop16: 244 SUB $16, X10 245 SUB $16, X11 246 MOV 0(X11), X14 247 MOV 8(X11), X15 248 MOV X14, 0(X10) 249 MOV X15, 8(X10) 250 SUB $16, X12 251 BGE X12, X9, b_loop16 252 BEQZ X12, done 253 254 b_loop8_check: 255 MOV $8, X9 256 BLT X12, X9, b_loop4_check 257 b_loop8: 258 SUB $8, X10 259 SUB $8, X11 260 MOV 0(X11), X14 261 MOV X14, 0(X10) 262 SUB $8, X12 263 BGE X12, X9, b_loop8 264 BEQZ X12, done 265 JMP b_loop4_check 266 267 b_loop8_unaligned_check: 268 MOV $8, X9 269 BLT X12, X9, b_loop4_check 270 b_loop8_unaligned: 271 SUB $8, X10 272 SUB $8, X11 273 MOVB 0(X11), X14 274 MOVB 1(X11), X15 275 MOVB 2(X11), X16 276 MOVB 3(X11), X17 277 MOVB 4(X11), X18 278 MOVB 5(X11), X19 279 MOVB 6(X11), X20 280 MOVB 7(X11), X21 281 MOVB X14, 0(X10) 282 MOVB X15, 1(X10) 283 MOVB X16, 2(X10) 284 MOVB X17, 3(X10) 285 MOVB X18, 4(X10) 286 MOVB X19, 5(X10) 287 MOVB X20, 6(X10) 288 MOVB X21, 7(X10) 289 SUB $8, X12 290 BGE X12, X9, b_loop8_unaligned 291 292 b_loop4_check: 293 MOV $4, X9 294 BLT X12, X9, b_loop1 295 b_loop4: 296 SUB $4, X10 297 SUB $4, X11 298 MOVB 0(X11), X14 299 MOVB 1(X11), X15 300 MOVB 2(X11), X16 301 MOVB 3(X11), X17 302 MOVB X14, 0(X10) 303 MOVB X15, 1(X10) 304 MOVB X16, 2(X10) 305 MOVB X17, 3(X10) 306 SUB $4, X12 307 BGE X12, X9, b_loop4 308 309 b_loop1: 310 BEQZ X12, done 311 SUB $1, X10 312 SUB $1, X11 313 MOVB 0(X11), X14 314 MOVB X14, 0(X10) 315 SUB $1, X12 316 JMP b_loop1 317 318 done: 319 RET