github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/runtime/memmove_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64 || ppc64le 6 7 #include "textflag.h" 8 9 // See memmove Go doc for important implementation constraints. 10 11 // func memmove(to, from unsafe.Pointer, n uintptr) 12 13 // target address 14 #define TGT R3 15 // source address 16 #define SRC R4 17 // length to move 18 #define LEN R5 19 // number of doublewords 20 #define DWORDS R6 21 // number of bytes < 8 22 #define BYTES R7 23 // const 16 used as index 24 #define IDX16 R8 25 // temp used for copies, etc. 26 #define TMP R9 27 // number of 64 byte chunks 28 #define QWORDS R10 29 // index values 30 #define IDX32 R14 31 #define IDX48 R15 32 #define OCTWORDS R16 33 34 TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24 35 // R3 = TGT = to 36 // R4 = SRC = from 37 // R5 = LEN = n 38 39 // Determine if there are doublewords to 40 // copy so a more efficient move can be done 41 check: 42 ANDCC $7, LEN, BYTES // R7: bytes to copy 43 SRD $3, LEN, DWORDS // R6: double words to copy 44 MOVFL CR0, CR3 // save CR from ANDCC 45 CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy 46 47 // Determine overlap by subtracting dest - src and comparing against the 48 // length. This catches the cases where src and dest are in different types 49 // of storage such as stack and static to avoid doing backward move when not 50 // necessary. 51 52 SUB SRC, TGT, TMP // dest - src 53 CMPU TMP, LEN, CR2 // < len? 54 BC 12, 8, backward // BLT CR2 backward 55 56 // Copying forward if no overlap. 57 58 BC 12, 6, checkbytes // BEQ CR1, checkbytes 59 SRDCC $3, DWORDS, OCTWORDS // 64 byte chunks? 60 MOVD $16, IDX16 61 BEQ lt64gt8 // < 64 bytes 62 63 // Prepare for moves of 64 bytes at a time. 64 65 forward64setup: 66 DCBTST (TGT) // prepare data cache 67 DCBT (SRC) 68 MOVD OCTWORDS, CTR // Number of 64 byte chunks 69 MOVD $32, IDX32 70 MOVD $48, IDX48 71 PCALIGN $32 72 73 forward64: 74 LXVD2X (R0)(SRC), VS32 // load 64 bytes 75 LXVD2X (IDX16)(SRC), VS33 76 LXVD2X (IDX32)(SRC), VS34 77 LXVD2X (IDX48)(SRC), VS35 78 ADD $64, SRC 79 STXVD2X VS32, (R0)(TGT) // store 64 bytes 80 STXVD2X VS33, (IDX16)(TGT) 81 STXVD2X VS34, (IDX32)(TGT) 82 STXVD2X VS35, (IDX48)(TGT) 83 ADD $64,TGT // bump up for next set 84 BC 16, 0, forward64 // continue 85 ANDCC $7, DWORDS // remaining doublewords 86 BEQ checkbytes // only bytes remain 87 88 lt64gt8: 89 CMP DWORDS, $4 90 BLT lt32gt8 91 LXVD2X (R0)(SRC), VS32 92 LXVD2X (IDX16)(SRC), VS33 93 ADD $-4, DWORDS 94 STXVD2X VS32, (R0)(TGT) 95 STXVD2X VS33, (IDX16)(TGT) 96 ADD $32, SRC 97 ADD $32, TGT 98 99 lt32gt8: 100 // At this point >= 8 and < 32 101 // Move 16 bytes if possible 102 CMP DWORDS, $2 103 BLT lt16 104 LXVD2X (R0)(SRC), VS32 105 ADD $-2, DWORDS 106 STXVD2X VS32, (R0)(TGT) 107 ADD $16, SRC 108 ADD $16, TGT 109 110 lt16: // Move 8 bytes if possible 111 CMP DWORDS, $1 112 BLT checkbytes 113 MOVD 0(SRC), TMP 114 ADD $8, SRC 115 MOVD TMP, 0(TGT) 116 ADD $8, TGT 117 checkbytes: 118 BC 12, 14, LR // BEQ lr 119 lt8: // Move word if possible 120 CMP BYTES, $4 121 BLT lt4 122 MOVWZ 0(SRC), TMP 123 ADD $-4, BYTES 124 MOVW TMP, 0(TGT) 125 ADD $4, SRC 126 ADD $4, TGT 127 lt4: // Move halfword if possible 128 CMP BYTES, $2 129 BLT lt2 130 MOVHZ 0(SRC), TMP 131 ADD $-2, BYTES 132 MOVH TMP, 0(TGT) 133 ADD $2, SRC 134 ADD $2, TGT 135 lt2: // Move last byte if 1 left 136 CMP BYTES, $1 137 BC 12, 0, LR // ble lr 138 MOVBZ 0(SRC), TMP 139 MOVBZ TMP, 0(TGT) 140 RET 141 142 backward: 143 // Copying backwards proceeds by copying R7 bytes then copying R6 double words. 144 // R3 and R4 are advanced to the end of the destination/source buffers 145 // respectively and moved back as we copy. 146 147 ADD LEN, SRC, SRC // end of source 148 ADD TGT, LEN, TGT // end of dest 149 150 BEQ nobackwardtail // earlier condition 151 152 MOVD BYTES, CTR // bytes to move 153 154 backwardtailloop: 155 MOVBZ -1(SRC), TMP // point to last byte 156 SUB $1,SRC 157 MOVBZ TMP, -1(TGT) 158 SUB $1,TGT 159 BDNZ backwardtailloop 160 161 nobackwardtail: 162 BC 4, 5, LR // blelr cr1, return if DWORDS == 0 163 SRDCC $2,DWORDS,QWORDS // Compute number of 32B blocks and compare to 0 164 BNE backward32setup // If QWORDS != 0, start the 32B copy loop. 165 166 backward24: 167 // DWORDS is a value between 1-3. 168 CMP DWORDS, $2 169 170 MOVD -8(SRC), TMP 171 MOVD TMP, -8(TGT) 172 BC 12, 0, LR // bltlr, return if DWORDS == 1 173 174 MOVD -16(SRC), TMP 175 MOVD TMP, -16(TGT) 176 BC 12, 2, LR // beqlr, return if DWORDS == 2 177 178 MOVD -24(SRC), TMP 179 MOVD TMP, -24(TGT) 180 RET 181 182 backward32setup: 183 ANDCC $3,DWORDS // Compute remaining DWORDS and compare to 0 184 MOVD QWORDS, CTR // set up loop ctr 185 MOVD $16, IDX16 // 32 bytes at a time 186 187 backward32loop: 188 SUB $32, TGT 189 SUB $32, SRC 190 LXVD2X (R0)(SRC), VS32 // load 16x2 bytes 191 LXVD2X (IDX16)(SRC), VS33 192 STXVD2X VS32, (R0)(TGT) // store 16x2 bytes 193 STXVD2X VS33, (IDX16)(TGT) 194 BDNZ backward32loop 195 BC 12, 2, LR // beqlr, return if DWORDS == 0 196 BR backward24