github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/runtime/memmove_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ppc64 ppc64le 6 7 #include "textflag.h" 8 9 // func memmove(to, from unsafe.Pointer, n uintptr) 10 TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24 11 MOVD to+0(FP), R3 12 MOVD from+8(FP), R4 13 MOVD n+16(FP), R5 14 15 // Determine if there are doublewords to 16 // copy so a more efficient move can be done 17 check: 18 ANDCC $7, R5, R7 // R7: bytes to copy 19 SRD $3, R5, R6 // R6: double words to copy 20 CMP R6, $0, CR1 // CR1[EQ] set if no double words to copy 21 22 // Determine overlap by subtracting dest - src and comparing against the 23 // length. The catches the cases where src and dest are in different types 24 // of storage such as stack and static to avoid doing backward move when not 25 // necessary. 26 27 SUB R4, R3, R8 // dest - src 28 CMPU R8, R5, CR2 // < len? 29 BC 12, 8, backward // BLT CR2 backward 30 31 // Copying forward if no overlap. 32 33 BC 12, 6, noforwardlarge // "BEQ CR1, noforwardlarge" 34 SRDCC $2,R6,R8 // 32 byte chunks? 35 BNE forward32setup // 36 MOVD R6,CTR // R6 = number of double words 37 38 // Move double words 39 40 forward8: 41 MOVD 0(R4), R8 // double word 42 ADD $8,R4 43 MOVD R8, 0(R3) // 44 ADD $8,R3 45 BC 16, 0, forward8 46 BR noforwardlarge // handle remainder 47 48 // Prepare for moves of 32 bytes at a time. 49 50 forward32setup: 51 DCBTST (R3) // prepare data cache 52 DCBT (R4) 53 MOVD R8, CTR // double work count 54 MOVD $16, R8 55 56 forward32: 57 LXVD2X (R4+R0), VS32 // load 16 bytes 58 LXVD2X (R4+R8), VS33 59 ADD $32, R4 60 STXVD2X VS32, (R3+R0) // store 16 bytes 61 STXVD2X VS33, (R3+R8) 62 ADD $32,R3 // bump up for next set 63 BC 16, 0, forward32 // continue 64 RLDCLCC $61,R5,$3,R6 // remaining doublewords 65 BEQ noforwardlarge 66 MOVD R6,CTR // set up the CTR 67 BR forward8 68 69 noforwardlarge: 70 CMP R7,$0 // any remaining bytes 71 BC 4, 1, LR // ble lr 72 73 forwardtail: 74 MOVD R7, CTR // move tail bytes 75 76 forwardtailloop: 77 MOVBZ 0(R4), R8 // move single bytes 78 ADD $1,R4 79 MOVBZ R8, 0(R3) 80 ADD $1,R3 81 BC 16, 0, forwardtailloop 82 RET 83 84 backward: 85 // Copying backwards proceeds by copying R7 bytes then copying R6 double words. 86 // R3 and R4 are advanced to the end of the destination/source buffers 87 // respectively and moved back as we copy. 88 89 ADD R5, R4, R4 // end of source 90 ADD R3, R5, R3 // end of dest 91 92 BEQ nobackwardtail // earlier condition 93 94 MOVD R7, CTR // bytes to move 95 96 backwardtailloop: 97 MOVBZ -1(R4), R8 // point to last byte 98 SUB $1,R4 99 MOVBZ R8, -1(R3) 100 SUB $1,R3 101 BC 16, 0, backwardtailloop // bndz 102 103 nobackwardtail: 104 BC 4, 5, LR // ble CR1 lr 105 106 backwardlarge: 107 MOVD R6, CTR 108 SUB R3, R4, R9 // Use vsx if moving 109 CMP R9, $32 // at least 32 byte chunks 110 BLT backwardlargeloop // and distance >= 32 111 SRDCC $2,R6,R8 // 32 byte chunks 112 BNE backward32setup 113 114 backwardlargeloop: 115 MOVD -8(R4), R8 116 SUB $8,R4 117 MOVD R8, -8(R3) 118 SUB $8,R3 119 BC 16, 0, backwardlargeloop // bndz 120 RET 121 122 backward32setup: 123 MOVD R8, CTR // set up loop ctr 124 MOVD $16, R8 // 32 bytes at at time 125 126 backward32loop: 127 SUB $32, R4 128 SUB $32, R3 129 LXVD2X (R4+R0), VS32 // load 16 bytes 130 LXVD2X (R4+R8), VS33 131 STXVD2X VS32, (R3+R0) // store 16 bytes 132 STXVD2X VS33, (R3+R8) 133 BC 16, 0, backward32loop // bndz 134 BC 4, 5, LR // ble CR1 lr 135 MOVD R6, CTR 136 BR backwardlargeloop