github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/runtime/memmove_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ppc64 ppc64le 6 7 #include "textflag.h" 8 9 // See memmove Go doc for important implementation constraints. 10 11 // func memmove(to, from unsafe.Pointer, n uintptr) 12 13 // target address 14 #define TGT R3 15 // source address 16 #define SRC R4 17 // length to move 18 #define LEN R5 19 // number of doublewords 20 #define DWORDS R6 21 // number of bytes < 8 22 #define BYTES R7 23 // const 16 used as index 24 #define IDX16 R8 25 // temp used for copies, etc. 26 #define TMP R9 27 // number of 32 byte chunks 28 #define QWORDS R10 29 30 TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24 31 MOVD to+0(FP), TGT 32 MOVD from+8(FP), SRC 33 MOVD n+16(FP), LEN 34 35 // Determine if there are doublewords to 36 // copy so a more efficient move can be done 37 check: 38 ANDCC $7, LEN, BYTES // R7: bytes to copy 39 SRD $3, LEN, DWORDS // R6: double words to copy 40 MOVFL CR0, CR3 // save CR from ANDCC 41 CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy 42 43 // Determine overlap by subtracting dest - src and comparing against the 44 // length. This catches the cases where src and dest are in different types 45 // of storage such as stack and static to avoid doing backward move when not 46 // necessary. 47 48 SUB SRC, TGT, TMP // dest - src 49 CMPU TMP, LEN, CR2 // < len? 50 BC 12, 8, backward // BLT CR2 backward 51 52 // Copying forward if no overlap. 53 54 BC 12, 6, checkbytes // BEQ CR1, checkbytes 55 SRDCC $2, DWORDS, QWORDS // 32 byte chunks? 56 BEQ lt32gt8 // < 32 bytes 57 58 // Prepare for moves of 32 bytes at a time. 59 60 forward32setup: 61 DCBTST (TGT) // prepare data cache 62 DCBT (SRC) 63 MOVD QWORDS, CTR // Number of 32 byte chunks 64 MOVD $16, IDX16 // 16 for index 65 66 forward32: 67 LXVD2X (R0)(SRC), VS32 // load 16 bytes 68 LXVD2X (IDX16)(SRC), VS33 // load 16 bytes 69 ADD $32, SRC 70 STXVD2X VS32, (R0)(TGT) // store 16 bytes 71 STXVD2X VS33, (IDX16)(TGT) 72 ADD $32,TGT // bump up for next set 73 BC 16, 0, forward32 // continue 74 ANDCC $3, DWORDS // remaining doublewords 75 BEQ checkbytes // only bytes remain 76 77 lt32gt8: 78 // At this point >= 8 and < 32 79 // Move 16 bytes if possible 80 CMP DWORDS, $2 81 BLT lt16 82 LXVD2X (R0)(SRC), VS32 83 ADD $-2, DWORDS 84 STXVD2X VS32, (R0)(TGT) 85 ADD $16, SRC 86 ADD $16, TGT 87 88 lt16: // Move 8 bytes if possible 89 CMP DWORDS, $1 90 BLT checkbytes 91 MOVD 0(SRC), TMP 92 ADD $8, SRC 93 MOVD TMP, 0(TGT) 94 ADD $8, TGT 95 checkbytes: 96 BC 12, 14, LR // BEQ lr 97 lt8: // Move word if possible 98 CMP BYTES, $4 99 BLT lt4 100 MOVWZ 0(SRC), TMP 101 ADD $-4, BYTES 102 MOVW TMP, 0(TGT) 103 ADD $4, SRC 104 ADD $4, TGT 105 lt4: // Move halfword if possible 106 CMP BYTES, $2 107 BLT lt2 108 MOVHZ 0(SRC), TMP 109 ADD $-2, BYTES 110 MOVH TMP, 0(TGT) 111 ADD $2, SRC 112 ADD $2, TGT 113 lt2: // Move last byte if 1 left 114 CMP BYTES, $1 115 BC 12, 0, LR // ble lr 116 MOVBZ 0(SRC), TMP 117 MOVBZ TMP, 0(TGT) 118 RET 119 120 backward: 121 // Copying backwards proceeds by copying R7 bytes then copying R6 double words. 122 // R3 and R4 are advanced to the end of the destination/source buffers 123 // respectively and moved back as we copy. 124 125 ADD LEN, SRC, SRC // end of source 126 ADD TGT, LEN, TGT // end of dest 127 128 BEQ nobackwardtail // earlier condition 129 130 MOVD BYTES, CTR // bytes to move 131 132 backwardtailloop: 133 MOVBZ -1(SRC), TMP // point to last byte 134 SUB $1,SRC 135 MOVBZ TMP, -1(TGT) 136 SUB $1,TGT 137 BC 16, 0, backwardtailloop // bndz 138 139 nobackwardtail: 140 BC 4, 5, LR // ble CR1 lr 141 142 backwardlarge: 143 MOVD DWORDS, CTR 144 SUB TGT, SRC, TMP // Use vsx if moving 145 CMP TMP, $32 // at least 32 byte chunks 146 BLT backwardlargeloop // and distance >= 32 147 SRDCC $2,DWORDS,QWORDS // 32 byte chunks 148 BNE backward32setup 149 150 backwardlargeloop: 151 MOVD -8(SRC), TMP 152 SUB $8,SRC 153 MOVD TMP, -8(TGT) 154 SUB $8,TGT 155 BC 16, 0, backwardlargeloop // bndz 156 RET 157 158 backward32setup: 159 MOVD QWORDS, CTR // set up loop ctr 160 MOVD $16, IDX16 // 32 bytes at at time 161 162 backward32loop: 163 SUB $32, TGT 164 SUB $32, SRC 165 LXVD2X (R0)(TGT), VS32 // load 16 bytes 166 LXVD2X (IDX16)(TGT), VS33 167 STXVD2X VS32, (R0)(SRC) // store 16 bytes 168 STXVD2X VS33, (IDX16)(SRC) 169 BC 16, 0, backward32loop // bndz 170 BC 4, 5, LR // ble CR1 lr 171 MOVD DWORDS, CTR 172 BR backwardlargeloop