github.com/primecitizens/pcz/std@v0.2.1/core/mem/move_mipsx.s (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 // 4 // Copyright 2016 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 //go:build pcz && (mips || mipsle) 9 10 #include "textflag.h" 11 12 #ifdef GOARCH_mips 13 #define MOVWHI MOVWL 14 #define MOVWLO MOVWR 15 #else 16 #define MOVWHI MOVWR 17 #define MOVWLO MOVWL 18 #endif 19 20 // See memmove Go doc for important implementation constraints. 21 22 // func Move(to, from unsafe.Pointer, n uintptr) 23 TEXT ·Move(SB),NOSPLIT,$-0-12 24 MOVW n+8(FP), R3 25 MOVW from+4(FP), R2 26 MOVW to+0(FP), R1 27 28 ADDU R3, R2, R4 // end pointer for source 29 ADDU R3, R1, R5 // end pointer for destination 30 31 // if destination is ahead of source, start at the end of the buffer and go backward. 32 SGTU R1, R2, R6 33 BNE R6, backward 34 35 // if less than 4 bytes, use byte by byte copying 36 SGTU $4, R3, R6 37 BNE R6, f_small_copy 38 39 // align destination to 4 bytes 40 AND $3, R1, R6 41 BEQ R6, f_dest_aligned 42 SUBU R1, R0, R6 43 AND $3, R6 44 MOVWHI 0(R2), R7 45 SUBU R6, R3 46 MOVWLO 3(R2), R7 47 ADDU R6, R2 48 MOVWHI R7, 0(R1) 49 ADDU R6, R1 50 51 f_dest_aligned: 52 AND $31, R3, R7 53 AND $3, R3, R6 54 SUBU R7, R5, R7 // end pointer for 32-byte chunks 55 SUBU R6, R5, R6 // end pointer for 4-byte chunks 56 57 // if source is not aligned, use unaligned reads 58 AND $3, R2, R8 59 BNE R8, f_large_ua 60 61 f_large: 62 BEQ R1, R7, f_words 63 ADDU $32, R1 64 MOVW 0(R2), R8 65 MOVW 4(R2), R9 66 MOVW 8(R2), R10 67 MOVW 12(R2), R11 68 MOVW 16(R2), R12 69 MOVW 20(R2), R13 70 MOVW 24(R2), R14 71 MOVW 28(R2), R15 72 ADDU $32, R2 73 MOVW R8, -32(R1) 74 MOVW R9, -28(R1) 75 MOVW R10, -24(R1) 76 MOVW R11, -20(R1) 77 MOVW R12, -16(R1) 78 MOVW R13, -12(R1) 79 MOVW R14, -8(R1) 80 MOVW R15, -4(R1) 81 JMP f_large 82 83 f_words: 84 BEQ R1, R6, f_tail 85 ADDU $4, R1 86 MOVW 0(R2), R8 87 ADDU $4, R2 88 MOVW R8, -4(R1) 89 JMP f_words 90 91 f_tail: 92 BEQ R1, R5, ret 93 MOVWLO -1(R4), R8 94 MOVWLO R8, -1(R5) 95 96 ret: 97 RET 98 99 f_large_ua: 100 BEQ R1, R7, f_words_ua 101 ADDU $32, R1 102 MOVWHI 0(R2), R8 103 MOVWHI 4(R2), R9 104 MOVWHI 8(R2), R10 105 MOVWHI 12(R2), R11 106 MOVWHI 16(R2), R12 107 MOVWHI 20(R2), R13 108 MOVWHI 24(R2), R14 109 MOVWHI 28(R2), R15 110 MOVWLO 3(R2), R8 111 MOVWLO 7(R2), R9 112 MOVWLO 11(R2), R10 113 MOVWLO 15(R2), R11 114 MOVWLO 19(R2), R12 115 MOVWLO 23(R2), R13 116 MOVWLO 27(R2), R14 117 MOVWLO 31(R2), R15 118 ADDU $32, R2 119 MOVW R8, -32(R1) 120 MOVW R9, -28(R1) 121 MOVW R10, -24(R1) 122 MOVW R11, -20(R1) 123 MOVW R12, -16(R1) 124 MOVW R13, -12(R1) 125 MOVW R14, -8(R1) 126 MOVW R15, -4(R1) 127 JMP f_large_ua 128 129 f_words_ua: 130 BEQ R1, R6, f_tail_ua 131 MOVWHI 0(R2), R8 132 ADDU $4, R1 133 MOVWLO 3(R2), R8 134 ADDU $4, R2 135 MOVW R8, -4(R1) 136 JMP f_words_ua 137 138 f_tail_ua: 139 BEQ R1, R5, ret 140 MOVWHI -4(R4), R8 141 MOVWLO -1(R4), R8 142 MOVWLO R8, -1(R5) 143 JMP ret 144 145 f_small_copy: 146 BEQ R1, R5, ret 147 ADDU $1, R1 148 MOVB 0(R2), R6 149 ADDU $1, R2 150 MOVB R6, -1(R1) 151 JMP f_small_copy 152 153 backward: 154 SGTU $4, R3, R6 155 BNE R6, b_small_copy 156 157 AND $3, R5, R6 158 BEQ R6, b_dest_aligned 159 MOVWHI -4(R4), R7 160 SUBU R6, R3 161 MOVWLO -1(R4), R7 162 SUBU R6, R4 163 MOVWLO R7, -1(R5) 164 SUBU R6, R5 165 166 b_dest_aligned: 167 AND $31, R3, R7 168 AND $3, R3, R6 169 ADDU R7, R1, R7 170 ADDU R6, R1, R6 171 172 AND $3, R4, R8 173 BNE R8, b_large_ua 174 175 b_large: 176 BEQ R5, R7, b_words 177 ADDU $-32, R5 178 MOVW -4(R4), R8 179 MOVW -8(R4), R9 180 MOVW -12(R4), R10 181 MOVW -16(R4), R11 182 MOVW -20(R4), R12 183 MOVW -24(R4), R13 184 MOVW -28(R4), R14 185 MOVW -32(R4), R15 186 ADDU $-32, R4 187 MOVW R8, 28(R5) 188 MOVW R9, 24(R5) 189 MOVW R10, 20(R5) 190 MOVW R11, 16(R5) 191 MOVW R12, 12(R5) 192 MOVW R13, 8(R5) 193 MOVW R14, 4(R5) 194 MOVW R15, 0(R5) 195 JMP b_large 196 197 b_words: 198 BEQ R5, R6, b_tail 199 ADDU $-4, R5 200 MOVW -4(R4), R8 201 ADDU $-4, R4 202 MOVW R8, 0(R5) 203 JMP b_words 204 205 b_tail: 206 BEQ R5, R1, ret 207 MOVWHI 0(R2), R8 // R2 and R1 have the same alignment so we don't need to load a whole word 208 MOVWHI R8, 0(R1) 209 JMP ret 210 211 b_large_ua: 212 BEQ R5, R7, b_words_ua 213 ADDU $-32, R5 214 MOVWHI -4(R4), R8 215 MOVWHI -8(R4), R9 216 MOVWHI -12(R4), R10 217 MOVWHI -16(R4), R11 218 MOVWHI -20(R4), R12 219 MOVWHI -24(R4), R13 220 MOVWHI -28(R4), R14 221 MOVWHI -32(R4), R15 222 MOVWLO -1(R4), R8 223 MOVWLO -5(R4), R9 224 MOVWLO -9(R4), R10 225 MOVWLO -13(R4), R11 226 MOVWLO -17(R4), R12 227 MOVWLO -21(R4), R13 228 MOVWLO -25(R4), R14 229 MOVWLO -29(R4), R15 230 ADDU $-32, R4 231 MOVW R8, 28(R5) 232 MOVW R9, 24(R5) 233 MOVW R10, 20(R5) 234 MOVW R11, 16(R5) 235 MOVW R12, 12(R5) 236 MOVW R13, 8(R5) 237 MOVW R14, 4(R5) 238 MOVW R15, 0(R5) 239 JMP b_large_ua 240 241 b_words_ua: 242 BEQ R5, R6, b_tail_ua 243 MOVWHI -4(R4), R8 244 ADDU $-4, R5 245 MOVWLO -1(R4), R8 246 ADDU $-4, R4 247 MOVW R8, 0(R5) 248 JMP b_words_ua 249 250 b_tail_ua: 251 BEQ R5, R1, ret 252 MOVWHI (R2), R8 253 MOVWLO 3(R2), R8 254 MOVWHI R8, 0(R1) 255 JMP ret 256 257 b_small_copy: 258 BEQ R5, R1, ret 259 ADDU $-1, R5 260 MOVB -1(R4), R6 261 ADDU $-1, R4 262 MOVB R6, 0(R5) 263 JMP b_small_copy