github.com/primecitizens/pcz/std@v0.2.1/core/mem/move_mipsx.s (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2016 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  //go:build pcz && (mips || mipsle)
     9  
    10  #include "textflag.h"
    11  
    12  #ifdef GOARCH_mips
    13  #define MOVWHI  MOVWL
    14  #define MOVWLO  MOVWR
    15  #else
    16  #define MOVWHI  MOVWR
    17  #define MOVWLO  MOVWL
    18  #endif
    19  
    20  // See memmove Go doc for important implementation constraints.
    21  
    22  // func Move(to, from unsafe.Pointer, n uintptr)
    23  TEXT ·Move(SB),NOSPLIT,$-0-12
    24  	MOVW n+8(FP), R3
    25  	MOVW from+4(FP), R2
    26  	MOVW to+0(FP), R1
    27  
    28  	ADDU R3, R2, R4 // end pointer for source
    29  	ADDU R3, R1, R5 // end pointer for destination
    30  
    31  	// if destination is ahead of source, start at the end of the buffer and go backward.
    32  	SGTU R1, R2, R6
    33  	BNE R6, backward
    34  
    35  	// if less than 4 bytes, use byte by byte copying
    36  	SGTU $4, R3, R6
    37  	BNE R6, f_small_copy
    38  
    39  	// align destination to 4 bytes
    40  	AND $3, R1, R6
    41  	BEQ R6, f_dest_aligned
    42  	SUBU R1, R0, R6
    43  	AND $3, R6
    44  	MOVWHI 0(R2), R7
    45  	SUBU R6, R3
    46  	MOVWLO 3(R2), R7
    47  	ADDU R6, R2
    48  	MOVWHI R7, 0(R1)
    49  	ADDU R6, R1
    50  
    51  f_dest_aligned:
    52  	AND $31, R3, R7
    53  	AND $3, R3, R6
    54  	SUBU R7, R5, R7 // end pointer for 32-byte chunks
    55  	SUBU R6, R5, R6 // end pointer for 4-byte chunks
    56  
    57  	// if source is not aligned, use unaligned reads
    58  	AND $3, R2, R8
    59  	BNE R8, f_large_ua
    60  
    61  f_large:
    62  	BEQ R1, R7, f_words
    63  	ADDU $32, R1
    64  	MOVW 0(R2), R8
    65  	MOVW 4(R2), R9
    66  	MOVW 8(R2), R10
    67  	MOVW 12(R2), R11
    68  	MOVW 16(R2), R12
    69  	MOVW 20(R2), R13
    70  	MOVW 24(R2), R14
    71  	MOVW 28(R2), R15
    72  	ADDU $32, R2
    73  	MOVW R8, -32(R1)
    74  	MOVW R9, -28(R1)
    75  	MOVW R10, -24(R1)
    76  	MOVW R11, -20(R1)
    77  	MOVW R12, -16(R1)
    78  	MOVW R13, -12(R1)
    79  	MOVW R14, -8(R1)
    80  	MOVW R15, -4(R1)
    81  	JMP f_large
    82  
    83  f_words:
    84  	BEQ R1, R6, f_tail
    85  	ADDU $4, R1
    86  	MOVW 0(R2), R8
    87  	ADDU $4, R2
    88  	MOVW R8, -4(R1)
    89  	JMP f_words
    90  
    91  f_tail:
    92  	BEQ R1, R5, ret
    93  	MOVWLO -1(R4), R8
    94  	MOVWLO R8, -1(R5)
    95  
    96  ret:
    97  	RET
    98  
    99  f_large_ua:
   100  	BEQ R1, R7, f_words_ua
   101  	ADDU $32, R1
   102  	MOVWHI 0(R2), R8
   103  	MOVWHI 4(R2), R9
   104  	MOVWHI 8(R2), R10
   105  	MOVWHI 12(R2), R11
   106  	MOVWHI 16(R2), R12
   107  	MOVWHI 20(R2), R13
   108  	MOVWHI 24(R2), R14
   109  	MOVWHI 28(R2), R15
   110  	MOVWLO 3(R2), R8
   111  	MOVWLO 7(R2), R9
   112  	MOVWLO 11(R2), R10
   113  	MOVWLO 15(R2), R11
   114  	MOVWLO 19(R2), R12
   115  	MOVWLO 23(R2), R13
   116  	MOVWLO 27(R2), R14
   117  	MOVWLO 31(R2), R15
   118  	ADDU $32, R2
   119  	MOVW R8, -32(R1)
   120  	MOVW R9, -28(R1)
   121  	MOVW R10, -24(R1)
   122  	MOVW R11, -20(R1)
   123  	MOVW R12, -16(R1)
   124  	MOVW R13, -12(R1)
   125  	MOVW R14, -8(R1)
   126  	MOVW R15, -4(R1)
   127  	JMP f_large_ua
   128  
   129  f_words_ua:
   130  	BEQ R1, R6, f_tail_ua
   131  	MOVWHI 0(R2), R8
   132  	ADDU $4, R1
   133  	MOVWLO 3(R2), R8
   134  	ADDU $4, R2
   135  	MOVW R8, -4(R1)
   136  	JMP f_words_ua
   137  
   138  f_tail_ua:
   139  	BEQ R1, R5, ret
   140  	MOVWHI -4(R4), R8
   141  	MOVWLO -1(R4), R8
   142  	MOVWLO R8, -1(R5)
   143  	JMP ret
   144  
   145  f_small_copy:
   146  	BEQ R1, R5, ret
   147  	ADDU $1, R1
   148  	MOVB 0(R2), R6
   149  	ADDU $1, R2
   150  	MOVB R6, -1(R1)
   151  	JMP f_small_copy
   152  
   153  backward:
   154  	SGTU $4, R3, R6
   155  	BNE R6, b_small_copy
   156  
   157  	AND $3, R5, R6
   158  	BEQ R6, b_dest_aligned
   159  	MOVWHI -4(R4), R7
   160  	SUBU R6, R3
   161  	MOVWLO -1(R4), R7
   162  	SUBU R6, R4
   163  	MOVWLO R7, -1(R5)
   164  	SUBU R6, R5
   165  
   166  b_dest_aligned:
   167  	AND $31, R3, R7
   168  	AND $3, R3, R6
   169  	ADDU R7, R1, R7
   170  	ADDU R6, R1, R6
   171  
   172  	AND $3, R4, R8
   173  	BNE R8, b_large_ua
   174  
   175  b_large:
   176  	BEQ R5, R7, b_words
   177  	ADDU $-32, R5
   178  	MOVW -4(R4), R8
   179  	MOVW -8(R4), R9
   180  	MOVW -12(R4), R10
   181  	MOVW -16(R4), R11
   182  	MOVW -20(R4), R12
   183  	MOVW -24(R4), R13
   184  	MOVW -28(R4), R14
   185  	MOVW -32(R4), R15
   186  	ADDU $-32, R4
   187  	MOVW R8, 28(R5)
   188  	MOVW R9, 24(R5)
   189  	MOVW R10, 20(R5)
   190  	MOVW R11, 16(R5)
   191  	MOVW R12, 12(R5)
   192  	MOVW R13, 8(R5)
   193  	MOVW R14, 4(R5)
   194  	MOVW R15, 0(R5)
   195  	JMP b_large
   196  
   197  b_words:
   198  	BEQ R5, R6, b_tail
   199  	ADDU $-4, R5
   200  	MOVW -4(R4), R8
   201  	ADDU $-4, R4
   202  	MOVW R8, 0(R5)
   203  	JMP b_words
   204  
   205  b_tail:
   206  	BEQ R5, R1, ret
   207  	MOVWHI 0(R2), R8 // R2 and R1 have the same alignment so we don't need to load a whole word
   208  	MOVWHI R8, 0(R1)
   209  	JMP ret
   210  
   211  b_large_ua:
   212  	BEQ R5, R7, b_words_ua
   213  	ADDU $-32, R5
   214  	MOVWHI -4(R4), R8
   215  	MOVWHI -8(R4), R9
   216  	MOVWHI -12(R4), R10
   217  	MOVWHI -16(R4), R11
   218  	MOVWHI -20(R4), R12
   219  	MOVWHI -24(R4), R13
   220  	MOVWHI -28(R4), R14
   221  	MOVWHI -32(R4), R15
   222  	MOVWLO -1(R4), R8
   223  	MOVWLO -5(R4), R9
   224  	MOVWLO -9(R4), R10
   225  	MOVWLO -13(R4), R11
   226  	MOVWLO -17(R4), R12
   227  	MOVWLO -21(R4), R13
   228  	MOVWLO -25(R4), R14
   229  	MOVWLO -29(R4), R15
   230  	ADDU $-32, R4
   231  	MOVW R8, 28(R5)
   232  	MOVW R9, 24(R5)
   233  	MOVW R10, 20(R5)
   234  	MOVW R11, 16(R5)
   235  	MOVW R12, 12(R5)
   236  	MOVW R13, 8(R5)
   237  	MOVW R14, 4(R5)
   238  	MOVW R15, 0(R5)
   239  	JMP b_large_ua
   240  
   241  b_words_ua:
   242  	BEQ R5, R6, b_tail_ua
   243  	MOVWHI -4(R4), R8
   244  	ADDU $-4, R5
   245  	MOVWLO -1(R4), R8
   246  	ADDU $-4, R4
   247  	MOVW R8, 0(R5)
   248  	JMP b_words_ua
   249  
   250  b_tail_ua:
   251  	BEQ R5, R1, ret
   252  	MOVWHI (R2), R8
   253  	MOVWLO 3(R2), R8
   254  	MOVWHI R8, 0(R1)
   255  	JMP ret
   256  
   257  b_small_copy:
   258  	BEQ R5, R1, ret
   259  	ADDU $-1, R5
   260  	MOVB -1(R4), R6
   261  	ADDU $-1, R4
   262  	MOVB R6, 0(R5)
   263  	JMP b_small_copy