github.com/comwrg/go/src@v0.0.0-20220319063731-c238d0440370/runtime/memmove_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64 || ppc64le 6 // +build ppc64 ppc64le 7 8 #include "textflag.h" 9 10 // See memmove Go doc for important implementation constraints. 11 12 // func memmove(to, from unsafe.Pointer, n uintptr) 13 14 // target address 15 #define TGT R3 16 // source address 17 #define SRC R4 18 // length to move 19 #define LEN R5 20 // number of doublewords 21 #define DWORDS R6 22 // number of bytes < 8 23 #define BYTES R7 24 // const 16 used as index 25 #define IDX16 R8 26 // temp used for copies, etc. 27 #define TMP R9 28 // number of 32 byte chunks 29 #define QWORDS R10 30 31 TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24 32 MOVD to+0(FP), TGT 33 MOVD from+8(FP), SRC 34 MOVD n+16(FP), LEN 35 36 // Determine if there are doublewords to 37 // copy so a more efficient move can be done 38 check: 39 ANDCC $7, LEN, BYTES // R7: bytes to copy 40 SRD $3, LEN, DWORDS // R6: double words to copy 41 MOVFL CR0, CR3 // save CR from ANDCC 42 CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy 43 44 // Determine overlap by subtracting dest - src and comparing against the 45 // length. This catches the cases where src and dest are in different types 46 // of storage such as stack and static to avoid doing backward move when not 47 // necessary. 48 49 SUB SRC, TGT, TMP // dest - src 50 CMPU TMP, LEN, CR2 // < len? 51 BC 12, 8, backward // BLT CR2 backward 52 53 // Copying forward if no overlap. 54 55 BC 12, 6, checkbytes // BEQ CR1, checkbytes 56 SRDCC $2, DWORDS, QWORDS // 32 byte chunks? 57 BEQ lt32gt8 // < 32 bytes 58 59 // Prepare for moves of 32 bytes at a time. 60 61 forward32setup: 62 DCBTST (TGT) // prepare data cache 63 DCBT (SRC) 64 MOVD QWORDS, CTR // Number of 32 byte chunks 65 MOVD $16, IDX16 // 16 for index 66 67 forward32: 68 LXVD2X (R0)(SRC), VS32 // load 16 bytes 69 LXVD2X (IDX16)(SRC), VS33 // load 16 bytes 70 ADD $32, SRC 71 STXVD2X VS32, (R0)(TGT) // store 16 bytes 72 STXVD2X VS33, (IDX16)(TGT) 73 ADD $32,TGT // bump up for next set 74 BC 16, 0, forward32 // continue 75 ANDCC $3, DWORDS // remaining doublewords 76 BEQ checkbytes // only bytes remain 77 78 lt32gt8: 79 // At this point >= 8 and < 32 80 // Move 16 bytes if possible 81 CMP DWORDS, $2 82 BLT lt16 83 LXVD2X (R0)(SRC), VS32 84 ADD $-2, DWORDS 85 STXVD2X VS32, (R0)(TGT) 86 ADD $16, SRC 87 ADD $16, TGT 88 89 lt16: // Move 8 bytes if possible 90 CMP DWORDS, $1 91 BLT checkbytes 92 MOVD 0(SRC), TMP 93 ADD $8, SRC 94 MOVD TMP, 0(TGT) 95 ADD $8, TGT 96 checkbytes: 97 BC 12, 14, LR // BEQ lr 98 lt8: // Move word if possible 99 CMP BYTES, $4 100 BLT lt4 101 MOVWZ 0(SRC), TMP 102 ADD $-4, BYTES 103 MOVW TMP, 0(TGT) 104 ADD $4, SRC 105 ADD $4, TGT 106 lt4: // Move halfword if possible 107 CMP BYTES, $2 108 BLT lt2 109 MOVHZ 0(SRC), TMP 110 ADD $-2, BYTES 111 MOVH TMP, 0(TGT) 112 ADD $2, SRC 113 ADD $2, TGT 114 lt2: // Move last byte if 1 left 115 CMP BYTES, $1 116 BC 12, 0, LR // ble lr 117 MOVBZ 0(SRC), TMP 118 MOVBZ TMP, 0(TGT) 119 RET 120 121 backward: 122 // Copying backwards proceeds by copying R7 bytes then copying R6 double words. 123 // R3 and R4 are advanced to the end of the destination/source buffers 124 // respectively and moved back as we copy. 125 126 ADD LEN, SRC, SRC // end of source 127 ADD TGT, LEN, TGT // end of dest 128 129 BEQ nobackwardtail // earlier condition 130 131 MOVD BYTES, CTR // bytes to move 132 133 backwardtailloop: 134 MOVBZ -1(SRC), TMP // point to last byte 135 SUB $1,SRC 136 MOVBZ TMP, -1(TGT) 137 SUB $1,TGT 138 BC 16, 0, backwardtailloop // bndz 139 140 nobackwardtail: 141 BC 4, 5, LR // ble CR1 lr 142 143 backwardlarge: 144 MOVD DWORDS, CTR 145 SUB TGT, SRC, TMP // Use vsx if moving 146 CMP TMP, $32 // at least 32 byte chunks 147 BLT backwardlargeloop // and distance >= 32 148 SRDCC $2,DWORDS,QWORDS // 32 byte chunks 149 BNE backward32setup 150 151 backwardlargeloop: 152 MOVD -8(SRC), TMP 153 SUB $8,SRC 154 MOVD TMP, -8(TGT) 155 SUB $8,TGT 156 BC 16, 0, backwardlargeloop // bndz 157 RET 158 159 backward32setup: 160 MOVD QWORDS, CTR // set up loop ctr 161 MOVD $16, IDX16 // 32 bytes at a time 162 163 backward32loop: 164 SUB $32, TGT 165 SUB $32, SRC 166 LXVD2X (R0)(TGT), VS32 // load 16 bytes 167 LXVD2X (IDX16)(TGT), VS33 168 STXVD2X VS32, (R0)(SRC) // store 16 bytes 169 STXVD2X VS33, (IDX16)(SRC) 170 BC 16, 0, backward32loop // bndz 171 BC 4, 5, LR // ble CR1 lr 172 MOVD DWORDS, CTR 173 BR backwardlargeloop