github.com/bir3/gocompiler@v0.9.2202/src/cmd/compile/internal/ssa/tighten.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import "github.com/bir3/gocompiler/src/cmd/compile/internal/base" 8 9 // tighten moves Values closer to the Blocks in which they are used. 10 // This can reduce the amount of register spilling required, 11 // if it doesn't also create more live values. 12 // A Value can be moved to any block that 13 // dominates all blocks in which it is used. 14 func tighten(f *Func) { 15 if base.Flag.N != 0 && len(f.Blocks) < 10000 { 16 // Skip the optimization in -N mode, except for huge functions. 17 // Too many values live across blocks can cause pathological 18 // behavior in the register allocator (see issue 52180). 19 return 20 } 21 22 canMove := f.Cache.allocBoolSlice(f.NumValues()) 23 defer f.Cache.freeBoolSlice(canMove) 24 25 // Compute the memory states of each block. 26 startMem := f.Cache.allocValueSlice(f.NumBlocks()) 27 defer f.Cache.freeValueSlice(startMem) 28 endMem := f.Cache.allocValueSlice(f.NumBlocks()) 29 defer f.Cache.freeValueSlice(endMem) 30 memState(f, startMem, endMem) 31 32 for _, b := range f.Blocks { 33 for _, v := range b.Values { 34 if v.Op.isLoweredGetClosurePtr() { 35 // Must stay in the entry block. 36 continue 37 } 38 switch v.Op { 39 case OpPhi, OpArg, OpArgIntReg, OpArgFloatReg, OpSelect0, OpSelect1, OpSelectN: 40 // Phis need to stay in their block. 41 // Arg must stay in the entry block. 42 // Tuple selectors must stay with the tuple generator. 43 // SelectN is typically, ultimately, a register. 44 continue 45 } 46 // Count arguments which will need a register. 47 narg := 0 48 for _, a := range v.Args { 49 // SP and SB are special registers and have no effect on 50 // the allocation of general-purpose registers. 51 if a.needRegister() && a.Op != OpSB && a.Op != OpSP { 52 narg++ 53 } 54 } 55 if narg >= 2 && !v.Type.IsFlags() { 56 // Don't move values with more than one input, as that may 57 // increase register pressure. 58 // We make an exception for flags, as we want flag generators 59 // moved next to uses (because we only have 1 flag register). 60 continue 61 } 62 canMove[v.ID] = true 63 } 64 } 65 66 // Build data structure for fast least-common-ancestor queries. 67 lca := makeLCArange(f) 68 69 // For each moveable value, record the block that dominates all uses found so far. 70 target := f.Cache.allocBlockSlice(f.NumValues()) 71 defer f.Cache.freeBlockSlice(target) 72 73 // Grab loop information. 74 // We use this to make sure we don't tighten a value into a (deeper) loop. 75 idom := f.Idom() 76 loops := f.loopnest() 77 loops.calculateDepths() 78 79 changed := true 80 for changed { 81 changed = false 82 83 // Reset target 84 for i := range target { 85 target[i] = nil 86 } 87 88 // Compute target locations (for moveable values only). 89 // target location = the least common ancestor of all uses in the dominator tree. 90 for _, b := range f.Blocks { 91 for _, v := range b.Values { 92 for i, a := range v.Args { 93 if !canMove[a.ID] { 94 continue 95 } 96 use := b 97 if v.Op == OpPhi { 98 use = b.Preds[i].b 99 } 100 if target[a.ID] == nil { 101 target[a.ID] = use 102 } else { 103 target[a.ID] = lca.find(target[a.ID], use) 104 } 105 } 106 } 107 for _, c := range b.ControlValues() { 108 if !canMove[c.ID] { 109 continue 110 } 111 if target[c.ID] == nil { 112 target[c.ID] = b 113 } else { 114 target[c.ID] = lca.find(target[c.ID], b) 115 } 116 } 117 } 118 119 // If the target location is inside a loop, 120 // move the target location up to just before the loop head. 121 for _, b := range f.Blocks { 122 origloop := loops.b2l[b.ID] 123 for _, v := range b.Values { 124 t := target[v.ID] 125 if t == nil { 126 continue 127 } 128 targetloop := loops.b2l[t.ID] 129 for targetloop != nil && (origloop == nil || targetloop.depth > origloop.depth) { 130 t = idom[targetloop.header.ID] 131 target[v.ID] = t 132 targetloop = loops.b2l[t.ID] 133 } 134 } 135 } 136 137 // Move values to target locations. 138 for _, b := range f.Blocks { 139 for i := 0; i < len(b.Values); i++ { 140 v := b.Values[i] 141 t := target[v.ID] 142 if t == nil || t == b { 143 // v is not moveable, or is already in correct place. 144 continue 145 } 146 if mem := v.MemoryArg(); mem != nil { 147 if startMem[t.ID] != mem { 148 // We can't move a value with a memory arg unless the target block 149 // has that memory arg as its starting memory. 150 continue 151 } 152 } 153 if f.pass.debug > 0 { 154 b.Func.Warnl(v.Pos, "%v is moved", v.Op) 155 } 156 // Move v to the block which dominates its uses. 157 t.Values = append(t.Values, v) 158 v.Block = t 159 last := len(b.Values) - 1 160 b.Values[i] = b.Values[last] 161 b.Values[last] = nil 162 b.Values = b.Values[:last] 163 changed = true 164 i-- 165 } 166 } 167 } 168 } 169 170 // phiTighten moves constants closer to phi users. 171 // This pass avoids having lots of constants live for lots of the program. 172 // See issue 16407. 173 func phiTighten(f *Func) { 174 for _, b := range f.Blocks { 175 for _, v := range b.Values { 176 if v.Op != OpPhi { 177 continue 178 } 179 for i, a := range v.Args { 180 if !a.rematerializeable() { 181 continue // not a constant we can move around 182 } 183 if a.Block == b.Preds[i].b { 184 continue // already in the right place 185 } 186 // Make a copy of a, put in predecessor block. 187 v.SetArg(i, a.copyInto(b.Preds[i].b)) 188 } 189 } 190 } 191 } 192 193 // memState computes the memory state at the beginning and end of each block of 194 // the function. The memory state is represented by a value of mem type. 195 // The returned result is stored in startMem and endMem, and endMem is nil for 196 // blocks with no successors (Exit,Ret,RetJmp blocks). This algorithm is not 197 // suitable for infinite loop blocks that do not contain any mem operations. 198 // For example: 199 // b1: 200 // 201 // (some values) 202 // 203 // plain -> b2 204 // b2: <- b1 b2 205 // Plain -> b2 206 // 207 // Algorithm introduction: 208 // 1. The start memory state of a block is InitMem, a Phi node of type mem or 209 // an incoming memory value. 210 // 2. The start memory state of a block is consistent with the end memory state 211 // of its parent nodes. If the start memory state of a block is a Phi value, 212 // then the end memory state of its parent nodes is consistent with the 213 // corresponding argument value of the Phi node. 214 // 3. The algorithm first obtains the memory state of some blocks in the tree 215 // in the first step. Then floods the known memory state to other nodes in 216 // the second step. 217 func memState(f *Func, startMem, endMem []*Value) { 218 // This slice contains the set of blocks that have had their startMem set but this 219 // startMem value has not yet been propagated to the endMem of its predecessors 220 changed := make([]*Block, 0) 221 // First step, init the memory state of some blocks. 222 for _, b := range f.Blocks { 223 for _, v := range b.Values { 224 var mem *Value 225 if v.Op == OpPhi { 226 if v.Type.IsMemory() { 227 mem = v 228 } 229 } else if v.Op == OpInitMem { 230 mem = v // This is actually not needed. 231 } else if a := v.MemoryArg(); a != nil && a.Block != b { 232 // The only incoming memory value doesn't belong to this block. 233 mem = a 234 } 235 if mem != nil { 236 if old := startMem[b.ID]; old != nil { 237 if old == mem { 238 continue 239 } 240 f.Fatalf("func %s, startMem[%v] has different values, old %v, new %v", f.Name, b, old, mem) 241 } 242 startMem[b.ID] = mem 243 changed = append(changed, b) 244 } 245 } 246 } 247 248 // Second step, floods the known memory state of some blocks to others. 249 for len(changed) != 0 { 250 top := changed[0] 251 changed = changed[1:] 252 mem := startMem[top.ID] 253 for i, p := range top.Preds { 254 pb := p.b 255 if endMem[pb.ID] != nil { 256 continue 257 } 258 if mem.Op == OpPhi && mem.Block == top { 259 endMem[pb.ID] = mem.Args[i] 260 } else { 261 endMem[pb.ID] = mem 262 } 263 if startMem[pb.ID] == nil { 264 startMem[pb.ID] = endMem[pb.ID] 265 changed = append(changed, pb) 266 } 267 } 268 } 269 }