github.com/aloncn/graphics-go@v0.0.1/src/runtime/memmove_arm64.s (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // void runtime·memmove(void*, void*, uintptr)
     8  TEXT runtime·memmove(SB), NOSPLIT, $-8-24
     9  	MOVD	to+0(FP), R3
    10  	MOVD	from+8(FP), R4
    11  	MOVD	n+16(FP), R5
    12  	CMP	$0, R5
    13  	BNE	check
    14  	RET
    15  
    16  check:
    17  	AND	$~7, R5, R7	// R7 is N&~7
    18  	// TODO(mwhudson): this is written this way to avoid tickling
    19  	// warnings from addpool when written as AND $7, R5, R6 (see
    20  	// https://golang.org/issue/12708)
    21  	SUB	R7, R5, R6	// R6 is N&7
    22  
    23  	CMP	R3, R4
    24  	BLT	backward
    25  
    26  	// Copying forward proceeds by copying R7/8 words then copying R6 bytes.
    27  	// R3 and R4 are advanced as we copy.
    28  
    29          // (There may be implementations of armv8 where copying by bytes until
    30          // at least one of source or dest is word aligned is a worthwhile
    31          // optimization, but the on the one tested so far (xgene) it did not
    32          // make a significance difference.)
    33  
    34  	CMP	$0, R7		// Do we need to do any word-by-word copying?
    35  	BEQ	noforwardlarge
    36  
    37  	ADD	R3, R7, R9	// R9 points just past where we copy by word
    38  
    39  forwardlargeloop:
    40  	MOVD.P	8(R4), R8	// R8 is just a scratch register
    41  	MOVD.P	R8, 8(R3)
    42  	CMP	R3, R9
    43  	BNE	forwardlargeloop
    44  
    45  noforwardlarge:
    46  	CMP	$0, R6		// Do we need to do any byte-by-byte copying?
    47  	BNE	forwardtail
    48  	RET
    49  
    50  forwardtail:
    51  	ADD	R3, R6, R9	// R9 points just past the destination memory
    52  
    53  forwardtailloop:
    54  	MOVBU.P 1(R4), R8
    55  	MOVBU.P	R8, 1(R3)
    56  	CMP	R3, R9
    57  	BNE	forwardtailloop
    58  	RET
    59  
    60  backward:
    61  	// Copying backwards proceeds by copying R6 bytes then copying R7/8 words.
    62  	// R3 and R4 are advanced to the end of the destination/source buffers
    63  	// respectively and moved back as we copy.
    64  
    65  	ADD	R4, R5, R4	// R4 points just past the last source byte
    66  	ADD	R3, R5, R3	// R3 points just past the last destination byte
    67  
    68  	CMP	$0, R6		// Do we need to do any byte-by-byte copying?
    69  	BEQ	nobackwardtail
    70  
    71  	SUB	R6, R3, R9	// R9 points at the lowest destination byte that should be copied by byte.
    72  backwardtailloop:
    73  	MOVBU.W	-1(R4), R8
    74  	MOVBU.W	R8, -1(R3)
    75  	CMP	R9, R3
    76  	BNE	backwardtailloop
    77  
    78  nobackwardtail:
    79  	CMP     $0, R7		// Do we need to do any word-by-word copying?
    80  	BNE	backwardlarge
    81  	RET
    82  
    83  backwardlarge:
    84          SUB	R7, R3, R9      // R9 points at the lowest destination byte
    85  
    86  backwardlargeloop:
    87  	MOVD.W	-8(R4), R8
    88  	MOVD.W	R8, -8(R3)
    89  	CMP	R9, R3
    90  	BNE	backwardlargeloop
    91  	RET