github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/memmove_riscv64.s (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // See memmove Go doc for important implementation constraints. 8 9 // void runtime·memmove(void*, void*, uintptr) 10 TEXT runtime·memmove<ABIInternal>(SB),NOSPLIT,$-0-24 11 // X10 = to 12 // X11 = from 13 // X12 = n 14 BEQ X10, X11, done 15 BEQZ X12, done 16 17 // If the destination is ahead of the source, start at the end of the 18 // buffer and go backward. 19 BGTU X10, X11, backward 20 21 // If less than 8 bytes, do single byte copies. 22 MOV $8, X9 23 BLT X12, X9, f_loop4_check 24 25 // Check alignment - if alignment differs we have to do one byte at a time. 26 AND $3, X10, X5 27 AND $3, X11, X6 28 BNE X5, X6, f_loop8_unaligned_check 29 BEQZ X5, f_loop_check 30 31 // Move one byte at a time until we reach 8 byte alignment. 32 SUB X5, X12, X12 33 f_align: 34 ADD $-1, X5 35 MOVB 0(X11), X14 36 MOVB X14, 0(X10) 37 ADD $1, X10 38 ADD $1, X11 39 BNEZ X5, f_align 40 41 f_loop_check: 42 MOV $16, X9 43 BLT X12, X9, f_loop8_check 44 MOV $32, X9 45 BLT X12, X9, f_loop16_check 46 MOV $64, X9 47 BLT X12, X9, f_loop32_check 48 f_loop64: 49 MOV 0(X11), X14 50 MOV 8(X11), X15 51 MOV 16(X11), X16 52 MOV 24(X11), X17 53 MOV 32(X11), X18 54 MOV 40(X11), X19 55 MOV 48(X11), X20 56 MOV 56(X11), X21 57 MOV X14, 0(X10) 58 MOV X15, 8(X10) 59 MOV X16, 16(X10) 60 MOV X17, 24(X10) 61 MOV X18, 32(X10) 62 MOV X19, 40(X10) 63 MOV X20, 48(X10) 64 MOV X21, 56(X10) 65 ADD $64, X10 66 ADD $64, X11 67 ADD $-64, X12 68 BGE X12, X9, f_loop64 69 BEQZ X12, done 70 71 f_loop32_check: 72 MOV $32, X9 73 BLT X12, X9, f_loop16_check 74 f_loop32: 75 MOV 0(X11), X14 76 MOV 8(X11), X15 77 MOV 16(X11), X16 78 MOV 24(X11), X17 79 MOV X14, 0(X10) 80 MOV X15, 8(X10) 81 MOV X16, 16(X10) 82 MOV X17, 24(X10) 83 ADD $32, X10 84 ADD $32, X11 85 ADD $-32, X12 86 BGE X12, X9, f_loop32 87 BEQZ X12, done 88 89 f_loop16_check: 90 MOV $16, X9 91 BLT X12, X9, f_loop8_check 92 f_loop16: 93 MOV 0(X11), X14 94 MOV 8(X11), X15 95 MOV X14, 0(X10) 96 MOV X15, 8(X10) 97 ADD $16, X10 98 ADD $16, X11 99 ADD $-16, X12 100 BGE X12, X9, f_loop16 101 BEQZ X12, done 102 103 f_loop8_check: 104 MOV $8, X9 105 BLT X12, X9, f_loop4_check 106 f_loop8: 107 MOV 0(X11), X14 108 MOV X14, 0(X10) 109 ADD $8, X10 110 ADD $8, X11 111 ADD $-8, X12 112 BGE X12, X9, f_loop8 113 BEQZ X12, done 114 JMP f_loop4_check 115 116 f_loop8_unaligned_check: 117 MOV $8, X9 118 BLT X12, X9, f_loop4_check 119 f_loop8_unaligned: 120 MOVB 0(X11), X14 121 MOVB 1(X11), X15 122 MOVB 2(X11), X16 123 MOVB 3(X11), X17 124 MOVB 4(X11), X18 125 MOVB 5(X11), X19 126 MOVB 6(X11), X20 127 MOVB 7(X11), X21 128 MOVB X14, 0(X10) 129 MOVB X15, 1(X10) 130 MOVB X16, 2(X10) 131 MOVB X17, 3(X10) 132 MOVB X18, 4(X10) 133 MOVB X19, 5(X10) 134 MOVB X20, 6(X10) 135 MOVB X21, 7(X10) 136 ADD $8, X10 137 ADD $8, X11 138 ADD $-8, X12 139 BGE X12, X9, f_loop8_unaligned 140 141 f_loop4_check: 142 MOV $4, X9 143 BLT X12, X9, f_loop1 144 f_loop4: 145 MOVB 0(X11), X14 146 MOVB 1(X11), X15 147 MOVB 2(X11), X16 148 MOVB 3(X11), X17 149 MOVB X14, 0(X10) 150 MOVB X15, 1(X10) 151 MOVB X16, 2(X10) 152 MOVB X17, 3(X10) 153 ADD $4, X10 154 ADD $4, X11 155 ADD $-4, X12 156 BGE X12, X9, f_loop4 157 158 f_loop1: 159 BEQZ X12, done 160 MOVB 0(X11), X14 161 MOVB X14, 0(X10) 162 ADD $1, X10 163 ADD $1, X11 164 ADD $-1, X12 165 JMP f_loop1 166 167 backward: 168 ADD X10, X12, X10 169 ADD X11, X12, X11 170 171 // If less than 8 bytes, do single byte copies. 172 MOV $8, X9 173 BLT X12, X9, b_loop4_check 174 175 // Check alignment - if alignment differs we have to do one byte at a time. 176 AND $3, X10, X5 177 AND $3, X11, X6 178 BNE X5, X6, b_loop8_unaligned_check 179 BEQZ X5, b_loop_check 180 181 // Move one byte at a time until we reach 8 byte alignment. 182 SUB X5, X12, X12 183 b_align: 184 ADD $-1, X5 185 ADD $-1, X10 186 ADD $-1, X11 187 MOVB 0(X11), X14 188 MOVB X14, 0(X10) 189 BNEZ X5, b_align 190 191 b_loop_check: 192 MOV $16, X9 193 BLT X12, X9, b_loop8_check 194 MOV $32, X9 195 BLT X12, X9, b_loop16_check 196 MOV $64, X9 197 BLT X12, X9, b_loop32_check 198 b_loop64: 199 ADD $-64, X10 200 ADD $-64, X11 201 MOV 0(X11), X14 202 MOV 8(X11), X15 203 MOV 16(X11), X16 204 MOV 24(X11), X17 205 MOV 32(X11), X18 206 MOV 40(X11), X19 207 MOV 48(X11), X20 208 MOV 56(X11), X21 209 MOV X14, 0(X10) 210 MOV X15, 8(X10) 211 MOV X16, 16(X10) 212 MOV X17, 24(X10) 213 MOV X18, 32(X10) 214 MOV X19, 40(X10) 215 MOV X20, 48(X10) 216 MOV X21, 56(X10) 217 ADD $-64, X12 218 BGE X12, X9, b_loop64 219 BEQZ X12, done 220 221 b_loop32_check: 222 MOV $32, X9 223 BLT X12, X9, b_loop16_check 224 b_loop32: 225 ADD $-32, X10 226 ADD $-32, X11 227 MOV 0(X11), X14 228 MOV 8(X11), X15 229 MOV 16(X11), X16 230 MOV 24(X11), X17 231 MOV X14, 0(X10) 232 MOV X15, 8(X10) 233 MOV X16, 16(X10) 234 MOV X17, 24(X10) 235 ADD $-32, X12 236 BGE X12, X9, b_loop32 237 BEQZ X12, done 238 239 b_loop16_check: 240 MOV $16, X9 241 BLT X12, X9, b_loop8_check 242 b_loop16: 243 ADD $-16, X10 244 ADD $-16, X11 245 MOV 0(X11), X14 246 MOV 8(X11), X15 247 MOV X14, 0(X10) 248 MOV X15, 8(X10) 249 ADD $-16, X12 250 BGE X12, X9, b_loop16 251 BEQZ X12, done 252 253 b_loop8_check: 254 MOV $8, X9 255 BLT X12, X9, b_loop4_check 256 b_loop8: 257 ADD $-8, X10 258 ADD $-8, X11 259 MOV 0(X11), X14 260 MOV X14, 0(X10) 261 ADD $-8, X12 262 BGE X12, X9, b_loop8 263 BEQZ X12, done 264 JMP b_loop4_check 265 266 b_loop8_unaligned_check: 267 MOV $8, X9 268 BLT X12, X9, b_loop4_check 269 b_loop8_unaligned: 270 ADD $-8, X10 271 ADD $-8, X11 272 MOVB 0(X11), X14 273 MOVB 1(X11), X15 274 MOVB 2(X11), X16 275 MOVB 3(X11), X17 276 MOVB 4(X11), X18 277 MOVB 5(X11), X19 278 MOVB 6(X11), X20 279 MOVB 7(X11), X21 280 MOVB X14, 0(X10) 281 MOVB X15, 1(X10) 282 MOVB X16, 2(X10) 283 MOVB X17, 3(X10) 284 MOVB X18, 4(X10) 285 MOVB X19, 5(X10) 286 MOVB X20, 6(X10) 287 MOVB X21, 7(X10) 288 ADD $-8, X12 289 BGE X12, X9, b_loop8_unaligned 290 291 b_loop4_check: 292 MOV $4, X9 293 BLT X12, X9, b_loop1 294 b_loop4: 295 ADD $-4, X10 296 ADD $-4, X11 297 MOVB 0(X11), X14 298 MOVB 1(X11), X15 299 MOVB 2(X11), X16 300 MOVB 3(X11), X17 301 MOVB X14, 0(X10) 302 MOVB X15, 1(X10) 303 MOVB X16, 2(X10) 304 MOVB X17, 3(X10) 305 ADD $-4, X12 306 BGE X12, X9, b_loop4 307 308 b_loop1: 309 BEQZ X12, done 310 ADD $-1, X10 311 ADD $-1, X11 312 MOVB 0(X11), X14 313 MOVB X14, 0(X10) 314 ADD $-1, X12 315 JMP b_loop1 316 317 done: 318 RET