github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/runtime/memmove_ppc64x.s (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ppc64 ppc64le
     6  
     7  #include "textflag.h"
     8  
     9  // See memmove Go doc for important implementation constraints.
    10  
    11  // func memmove(to, from unsafe.Pointer, n uintptr)
    12  TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
    13  	MOVD	to+0(FP), R3
    14  	MOVD	from+8(FP), R4
    15  	MOVD	n+16(FP), R5
    16  
    17  	// Determine if there are doublewords to
    18  	// copy so a more efficient move can be done
    19  check:
    20  	ANDCC	$7, R5, R7	// R7: bytes to copy
    21  	SRD	$3, R5, R6	// R6: double words to copy
    22  	CMP	R6, $0, CR1	// CR1[EQ] set if no double words to copy
    23  
    24  	// Determine overlap by subtracting dest - src and comparing against the
    25  	// length.  The catches the cases where src and dest are in different types
    26  	// of storage such as stack and static to avoid doing backward move when not
    27  	// necessary.
    28  
    29  	SUB	R4, R3, R8	// dest - src
    30  	CMPU	R8, R5, CR2	// < len?
    31  	BC	12, 8, backward // BLT CR2 backward
    32  
    33  	// Copying forward if no overlap.
    34  
    35  	BC	12, 6, noforwardlarge	// "BEQ CR1, noforwardlarge"
    36  	SRDCC	$2,R6,R8		// 32 byte chunks?
    37  	BNE	forward32setup		//
    38  	MOVD	R6,CTR			// R6 = number of double words
    39  
    40  	// Move double words
    41  
    42  forward8:
    43  	MOVD    0(R4), R8		// double word
    44  	ADD     $8,R4
    45  	MOVD    R8, 0(R3)		//
    46  	ADD     $8,R3
    47  	BC      16, 0, forward8
    48  	BR	noforwardlarge		// handle remainder
    49  
    50  	// Prepare for moves of 32 bytes at a time.
    51  
    52  forward32setup:
    53  	DCBTST	(R3)			// prepare data cache
    54  	DCBT	(R4)
    55  	MOVD	R8, CTR			// double work count
    56  	MOVD	$16, R8
    57  
    58  forward32:
    59  	LXVD2X	(R4+R0), VS32		// load 16 bytes
    60  	LXVD2X	(R4+R8), VS33
    61  	ADD	$32, R4
    62  	STXVD2X	VS32, (R3+R0)		// store 16 bytes
    63  	STXVD2X	VS33, (R3+R8)
    64  	ADD	$32,R3			// bump up for next set
    65  	BC	16, 0, forward32	// continue
    66  	RLDCLCC	$61,R5,$3,R6		// remaining doublewords
    67  	BEQ	noforwardlarge
    68  	MOVD	R6,CTR			// set up the CTR
    69  	BR	forward8
    70  
    71  noforwardlarge:
    72  	CMP	R7,$0			// any remaining bytes
    73  	BC	4, 1, LR		// ble lr
    74  
    75  forwardtail:
    76  	MOVD	R7, CTR			// move tail bytes
    77  
    78  forwardtailloop:
    79  	MOVBZ	0(R4), R8		// move single bytes
    80  	ADD	$1,R4
    81  	MOVBZ	R8, 0(R3)
    82  	ADD	$1,R3
    83  	BC	16, 0, forwardtailloop
    84  	RET
    85  
    86  backward:
    87  	// Copying backwards proceeds by copying R7 bytes then copying R6 double words.
    88  	// R3 and R4 are advanced to the end of the destination/source buffers
    89  	// respectively and moved back as we copy.
    90  
    91  	ADD	R5, R4, R4		// end of source
    92  	ADD	R3, R5, R3		// end of dest
    93  
    94  	BEQ	nobackwardtail		// earlier condition
    95  
    96  	MOVD	R7, CTR			// bytes to move
    97  
    98  backwardtailloop:
    99  	MOVBZ 	-1(R4), R8		// point to last byte
   100  	SUB	$1,R4
   101  	MOVBZ 	R8, -1(R3)
   102  	SUB	$1,R3
   103  	BC	16, 0, backwardtailloop // bndz
   104  
   105  nobackwardtail:
   106  	BC	4, 5, LR		// ble CR1 lr
   107  
   108  backwardlarge:
   109  	MOVD	R6, CTR
   110  	SUB	R3, R4, R9		// Use vsx if moving
   111  	CMP	R9, $32			// at least 32 byte chunks
   112  	BLT	backwardlargeloop	// and distance >= 32
   113  	SRDCC	$2,R6,R8		// 32 byte chunks
   114  	BNE	backward32setup
   115  
   116  backwardlargeloop:
   117  	MOVD 	-8(R4), R8
   118  	SUB	$8,R4
   119  	MOVD 	R8, -8(R3)
   120  	SUB	$8,R3
   121  	BC	16, 0, backwardlargeloop // bndz
   122  	RET
   123  
   124  backward32setup:
   125  	MOVD	R8, CTR			// set up loop ctr
   126  	MOVD	$16, R8			// 32 bytes at at time
   127  
   128  backward32loop:
   129  	SUB	$32, R4
   130  	SUB	$32, R3
   131  	LXVD2X	(R4+R0), VS32           // load 16 bytes
   132  	LXVD2X	(R4+R8), VS33
   133  	STXVD2X	VS32, (R3+R0)           // store 16 bytes
   134  	STXVD2X	VS33, (R3+R8)
   135  	BC      16, 0, backward32loop   // bndz
   136  	BC	4, 5, LR		// ble CR1 lr
   137  	MOVD	R6, CTR
   138  	BR	backwardlargeloop