github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/runtime/memmove_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64 || ppc64le 6 7 #include "textflag.h" 8 9 // See memmove Go doc for important implementation constraints. 10 11 // func memmove(to, from unsafe.Pointer, n uintptr) 12 13 // target address 14 #define TGT R3 15 // source address 16 #define SRC R4 17 // length to move 18 #define LEN R5 19 // number of doublewords 20 #define DWORDS R6 21 // number of bytes < 8 22 #define BYTES R7 23 // const 16 used as index 24 #define IDX16 R8 25 // temp used for copies, etc. 26 #define TMP R9 27 // number of 64 byte chunks 28 #define QWORDS R10 29 // index values 30 #define IDX32 R14 31 #define IDX48 R15 32 #define OCTWORDS R16 33 34 TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24 35 // R3 = TGT = to 36 // R4 = SRC = from 37 // R5 = LEN = n 38 39 // Determine if there are doublewords to 40 // copy so a more efficient move can be done 41 check: 42 #ifdef GOPPC64_power10 43 CMP LEN, $16 44 BGT mcopy 45 SLD $56, LEN, TMP 46 LXVL SRC, TMP, V0 47 STXVL V0, TGT, TMP 48 RET 49 #endif 50 mcopy: 51 ANDCC $7, LEN, BYTES // R7: bytes to copy 52 SRD $3, LEN, DWORDS // R6: double words to copy 53 MOVFL CR0, CR3 // save CR from ANDCC 54 CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy 55 56 // Determine overlap by subtracting dest - src and comparing against the 57 // length. This catches the cases where src and dest are in different types 58 // of storage such as stack and static to avoid doing backward move when not 59 // necessary. 60 61 SUB SRC, TGT, TMP // dest - src 62 CMPU TMP, LEN, CR2 // < len? 63 BC 12, 8, backward // BLT CR2 backward 64 65 // Copying forward if no overlap. 66 67 BC 12, 6, checkbytes // BEQ CR1, checkbytes 68 SRDCC $3, DWORDS, OCTWORDS // 64 byte chunks? 69 MOVD $16, IDX16 70 BEQ lt64gt8 // < 64 bytes 71 72 // Prepare for moves of 64 bytes at a time. 73 74 forward64setup: 75 DCBTST (TGT) // prepare data cache 76 DCBT (SRC) 77 MOVD OCTWORDS, CTR // Number of 64 byte chunks 78 MOVD $32, IDX32 79 MOVD $48, IDX48 80 PCALIGN $16 81 82 forward64: 83 LXVD2X (R0)(SRC), VS32 // load 64 bytes 84 LXVD2X (IDX16)(SRC), VS33 85 LXVD2X (IDX32)(SRC), VS34 86 LXVD2X (IDX48)(SRC), VS35 87 ADD $64, SRC 88 STXVD2X VS32, (R0)(TGT) // store 64 bytes 89 STXVD2X VS33, (IDX16)(TGT) 90 STXVD2X VS34, (IDX32)(TGT) 91 STXVD2X VS35, (IDX48)(TGT) 92 ADD $64,TGT // bump up for next set 93 BC 16, 0, forward64 // continue 94 ANDCC $7, DWORDS // remaining doublewords 95 BEQ checkbytes // only bytes remain 96 97 lt64gt8: 98 CMP DWORDS, $4 99 BLT lt32gt8 100 LXVD2X (R0)(SRC), VS32 101 LXVD2X (IDX16)(SRC), VS33 102 ADD $-4, DWORDS 103 STXVD2X VS32, (R0)(TGT) 104 STXVD2X VS33, (IDX16)(TGT) 105 ADD $32, SRC 106 ADD $32, TGT 107 108 lt32gt8: 109 // At this point >= 8 and < 32 110 // Move 16 bytes if possible 111 CMP DWORDS, $2 112 BLT lt16 113 LXVD2X (R0)(SRC), VS32 114 ADD $-2, DWORDS 115 STXVD2X VS32, (R0)(TGT) 116 ADD $16, SRC 117 ADD $16, TGT 118 119 lt16: // Move 8 bytes if possible 120 CMP DWORDS, $1 121 BLT checkbytes 122 #ifdef GOPPC64_power10 123 ADD $8, BYTES 124 SLD $56, BYTES, TMP 125 LXVL SRC, TMP, V0 126 STXVL V0, TGT, TMP 127 RET 128 #endif 129 130 MOVD 0(SRC), TMP 131 ADD $8, SRC 132 MOVD TMP, 0(TGT) 133 ADD $8, TGT 134 checkbytes: 135 BC 12, 14, LR // BEQ lr 136 #ifdef GOPPC64_power10 137 SLD $56, BYTES, TMP 138 LXVL SRC, TMP, V0 139 STXVL V0, TGT, TMP 140 RET 141 #endif 142 lt8: // Move word if possible 143 CMP BYTES, $4 144 BLT lt4 145 MOVWZ 0(SRC), TMP 146 ADD $-4, BYTES 147 MOVW TMP, 0(TGT) 148 ADD $4, SRC 149 ADD $4, TGT 150 lt4: // Move halfword if possible 151 CMP BYTES, $2 152 BLT lt2 153 MOVHZ 0(SRC), TMP 154 ADD $-2, BYTES 155 MOVH TMP, 0(TGT) 156 ADD $2, SRC 157 ADD $2, TGT 158 lt2: // Move last byte if 1 left 159 CMP BYTES, $1 160 BC 12, 0, LR // ble lr 161 MOVBZ 0(SRC), TMP 162 MOVBZ TMP, 0(TGT) 163 RET 164 165 backward: 166 // Copying backwards proceeds by copying R7 bytes then copying R6 double words. 167 // R3 and R4 are advanced to the end of the destination/source buffers 168 // respectively and moved back as we copy. 169 170 ADD LEN, SRC, SRC // end of source 171 ADD TGT, LEN, TGT // end of dest 172 173 BEQ nobackwardtail // earlier condition 174 175 MOVD BYTES, CTR // bytes to move 176 177 backwardtailloop: 178 MOVBZ -1(SRC), TMP // point to last byte 179 SUB $1,SRC 180 MOVBZ TMP, -1(TGT) 181 SUB $1,TGT 182 BDNZ backwardtailloop 183 184 nobackwardtail: 185 BC 4, 5, LR // blelr cr1, return if DWORDS == 0 186 SRDCC $2,DWORDS,QWORDS // Compute number of 32B blocks and compare to 0 187 BNE backward32setup // If QWORDS != 0, start the 32B copy loop. 188 189 backward24: 190 // DWORDS is a value between 1-3. 191 CMP DWORDS, $2 192 193 MOVD -8(SRC), TMP 194 MOVD TMP, -8(TGT) 195 BC 12, 0, LR // bltlr, return if DWORDS == 1 196 197 MOVD -16(SRC), TMP 198 MOVD TMP, -16(TGT) 199 BC 12, 2, LR // beqlr, return if DWORDS == 2 200 201 MOVD -24(SRC), TMP 202 MOVD TMP, -24(TGT) 203 RET 204 205 backward32setup: 206 ANDCC $3,DWORDS // Compute remaining DWORDS and compare to 0 207 MOVD QWORDS, CTR // set up loop ctr 208 MOVD $16, IDX16 // 32 bytes at a time 209 PCALIGN $16 210 211 backward32loop: 212 SUB $32, TGT 213 SUB $32, SRC 214 LXVD2X (R0)(SRC), VS32 // load 16x2 bytes 215 LXVD2X (IDX16)(SRC), VS33 216 STXVD2X VS32, (R0)(TGT) // store 16x2 bytes 217 STXVD2X VS33, (IDX16)(TGT) 218 BDNZ backward32loop 219 BC 12, 2, LR // beqlr, return if DWORDS == 0 220 BR backward24