github.com/sanprasirt/go@v0.0.0-20170607001320-a027466e4b6d/src/runtime/memmove_ppc64x.s (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ppc64 ppc64le
     6  
     7  #include "textflag.h"
     8  
     9  // void runtime·memmove(void*, void*, uintptr)
    10  TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
    11  	MOVD	to+0(FP), R3
    12  	MOVD	from+8(FP), R4
    13  	MOVD	n+16(FP), R5
    14  
    15  	// Determine if there are doublewords to
    16  	// copy so a more efficient move can be done
    17  check:
    18  	ANDCC	$7, R5, R7	// R7: bytes to copy
    19  	SRAD	$3, R5, R6	// R6: double words to copy
    20  	CMP	R6, $0, CR1	// CR1[EQ] set if no double words to copy
    21  
    22  	// Determine overlap by subtracting dest - src and comparing against the
    23  	// length.  The catches the cases where src and dest are in different types
    24  	// of storage such as stack and static to avoid doing backward move when not
    25  	// necessary.
    26  
    27  	SUB	R4, R3, R8	// dest - src
    28  	CMPU	R8, R5, CR2	// < len?
    29  	BC	12, 8, backward // BLT CR2 backward
    30  
    31  	// Copying forward if no overlap.
    32  
    33  	BC	12, 6, noforwardlarge	// "BEQ CR1, noforwardlarge"
    34  	MOVD	R6,CTR			// R6 = number of double words
    35  	SRADCC	$2,R6,R8		// 32 byte chunks?
    36  	BNE	forward32setup		//
    37  
    38  	// Move double words
    39  
    40  forward8:
    41  	MOVD    0(R4), R8		// double word
    42  	ADD     $8,R4
    43  	MOVD    R8, 0(R3)		//
    44  	ADD     $8,R3
    45  	BC      16, 0, forward8
    46  	BR	noforwardlarge		// handle remainder
    47  
    48  	// Prepare for moves of 32 bytes at a time.
    49  
    50  forward32setup:
    51  	DCBTST	(R3)			// prepare data cache
    52  	DCBT	(R4)
    53  	MOVD	R8, CTR			// double work count
    54  
    55  forward32:
    56  	MOVD	0(R4), R8		// load 4 double words
    57  	MOVD	8(R4), R9
    58  	MOVD	16(R4), R14
    59  	MOVD	24(R4), R15
    60  	ADD	$32,R4
    61  	MOVD	R8, 0(R3)		// store those 4
    62  	MOVD	R9, 8(R3)
    63  	MOVD	R14,16(R3)
    64  	MOVD	R15,24(R3)
    65  	ADD	$32,R3			// bump up for next set
    66  	BC	16, 0, forward32	// continue
    67  	RLDCLCC	$61,R5,$3,R6		// remaining doublewords
    68  	BEQ	noforwardlarge
    69  	MOVD	R6,CTR			// set up the CTR
    70  	BR	forward8
    71  
    72  noforwardlarge:
    73  	CMP	R7,$0			// any remaining bytes
    74  	BC	4, 1, LR
    75  
    76  forwardtail:
    77  	MOVD	R7, CTR			// move tail bytes
    78  
    79  forwardtailloop:
    80  	MOVBZ	0(R4), R8		// move single bytes
    81  	ADD	$1,R4
    82  	MOVBZ	R8, 0(R3)
    83  	ADD	$1,R3
    84  	BC	16, 0, forwardtailloop
    85  	RET
    86  
    87  backward:
    88  	// Copying backwards proceeds by copying R7 bytes then copying R6 double words.
    89  	// R3 and R4 are advanced to the end of the destination/source buffers
    90  	// respectively and moved back as we copy.
    91  
    92  	ADD	R5, R4, R4		// end of source
    93  	ADD	R3, R5, R3		// end of dest
    94  
    95  	BEQ	nobackwardtail		// earlier condition
    96  
    97  	MOVD	R7, CTR			// bytes to move
    98  
    99  backwardtailloop:
   100  	MOVBZ 	-1(R4), R8		// point to last byte
   101  	SUB	$1,R4
   102  	MOVBZ 	R8, -1(R3)
   103  	SUB	$1,R3
   104  	BC	16, 0, backwardtailloop
   105  
   106  nobackwardtail:
   107  	CMP	R6,$0
   108  	BC	4, 5, LR
   109  
   110  backwardlarge:
   111  	MOVD	R6, CTR
   112  
   113  backwardlargeloop:
   114  	MOVD 	-8(R4), R8
   115  	SUB	$8,R4
   116  	MOVD 	R8, -8(R3)
   117  	SUB	$8,R3
   118  	BC	16, 0, backwardlargeloop	//
   119  	RET