github.com/euank/go@v0.0.0-20160829210321-495514729181/src/cmd/compile/internal/ssa/schedule.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import "container/heap" 8 9 const ( 10 ScorePhi = iota // towards top of block 11 ScoreReadTuple 12 ScoreVarDef 13 ScoreMemory 14 ScoreDefault 15 ScoreFlags 16 ScoreControl // towards bottom of block 17 ) 18 19 type ValHeap struct { 20 a []*Value 21 score []int8 22 } 23 24 func (h ValHeap) Len() int { return len(h.a) } 25 func (h ValHeap) Swap(i, j int) { a := h.a; a[i], a[j] = a[j], a[i] } 26 27 func (h *ValHeap) Push(x interface{}) { 28 // Push and Pop use pointer receivers because they modify the slice's length, 29 // not just its contents. 30 v := x.(*Value) 31 h.a = append(h.a, v) 32 } 33 func (h *ValHeap) Pop() interface{} { 34 old := h.a 35 n := len(old) 36 x := old[n-1] 37 h.a = old[0 : n-1] 38 return x 39 } 40 func (h ValHeap) Less(i, j int) bool { 41 x := h.a[i] 42 y := h.a[j] 43 sx := h.score[x.ID] 44 sy := h.score[y.ID] 45 if c := sx - sy; c != 0 { 46 return c > 0 // higher score comes later. 47 } 48 if x.Line != y.Line { // Favor in-order line stepping 49 return x.Line > y.Line 50 } 51 if x.Op != OpPhi { 52 if c := len(x.Args) - len(y.Args); c != 0 { 53 return c < 0 // smaller args comes later 54 } 55 } 56 return x.ID > y.ID 57 } 58 59 // Schedule the Values in each Block. After this phase returns, the 60 // order of b.Values matters and is the order in which those values 61 // will appear in the assembly output. For now it generates a 62 // reasonable valid schedule using a priority queue. TODO(khr): 63 // schedule smarter. 64 func schedule(f *Func) { 65 // For each value, the number of times it is used in the block 66 // by values that have not been scheduled yet. 67 uses := make([]int32, f.NumValues()) 68 69 // reusable priority queue 70 priq := new(ValHeap) 71 72 // "priority" for a value 73 score := make([]int8, f.NumValues()) 74 75 // scheduling order. We queue values in this list in reverse order. 76 var order []*Value 77 78 // maps mem values to the next live memory value 79 nextMem := make([]*Value, f.NumValues()) 80 // additional pretend arguments for each Value. Used to enforce load/store ordering. 81 additionalArgs := make([][]*Value, f.NumValues()) 82 83 for _, b := range f.Blocks { 84 // Compute score. Larger numbers are scheduled closer to the end of the block. 85 for _, v := range b.Values { 86 switch { 87 case v.Op == OpAMD64LoweredGetClosurePtr || v.Op == OpPPC64LoweredGetClosurePtr || v.Op == OpARMLoweredGetClosurePtr || v.Op == OpARM64LoweredGetClosurePtr || v.Op == Op386LoweredGetClosurePtr || v.Op == OpMIPS64LoweredGetClosurePtr: 88 // We also score GetLoweredClosurePtr as early as possible to ensure that the 89 // context register is not stomped. GetLoweredClosurePtr should only appear 90 // in the entry block where there are no phi functions, so there is no 91 // conflict or ambiguity here. 92 if b != f.Entry { 93 f.Fatalf("LoweredGetClosurePtr appeared outside of entry block, b=%s", b.String()) 94 } 95 score[v.ID] = ScorePhi 96 case v.Op == OpPhi: 97 // We want all the phis first. 98 score[v.ID] = ScorePhi 99 case v.Op == OpVarDef: 100 // We want all the vardefs next. 101 score[v.ID] = ScoreVarDef 102 case v.Type.IsMemory(): 103 // Schedule stores as early as possible. This tends to 104 // reduce register pressure. It also helps make sure 105 // VARDEF ops are scheduled before the corresponding LEA. 106 score[v.ID] = ScoreMemory 107 case v.Op == OpSelect0 || v.Op == OpSelect1: 108 // Schedule the pseudo-op of reading part of a tuple 109 // immediately after the tuple-generating op, since 110 // this value is already live. This also removes its 111 // false dependency on the other part of the tuple. 112 // Also ensures tuple is never spilled. 113 score[v.ID] = ScoreReadTuple 114 case v.Type.IsFlags() || v.Type.IsTuple(): 115 // Schedule flag register generation as late as possible. 116 // This makes sure that we only have one live flags 117 // value at a time. 118 score[v.ID] = ScoreFlags 119 default: 120 score[v.ID] = ScoreDefault 121 } 122 } 123 } 124 125 for _, b := range f.Blocks { 126 // Find store chain for block. 127 // Store chains for different blocks overwrite each other, so 128 // the calculated store chain is good only for this block. 129 for _, v := range b.Values { 130 if v.Op != OpPhi && v.Type.IsMemory() { 131 for _, w := range v.Args { 132 if w.Type.IsMemory() { 133 nextMem[w.ID] = v 134 } 135 } 136 } 137 } 138 139 // Compute uses. 140 for _, v := range b.Values { 141 if v.Op == OpPhi { 142 // If a value is used by a phi, it does not induce 143 // a scheduling edge because that use is from the 144 // previous iteration. 145 continue 146 } 147 for _, w := range v.Args { 148 if w.Block == b { 149 uses[w.ID]++ 150 } 151 // Any load must come before the following store. 152 if v.Type.IsMemory() || !w.Type.IsMemory() { 153 continue // not a load 154 } 155 s := nextMem[w.ID] 156 if s == nil || s.Block != b { 157 continue 158 } 159 additionalArgs[s.ID] = append(additionalArgs[s.ID], v) 160 uses[v.ID]++ 161 } 162 } 163 164 if b.Control != nil && b.Control.Op != OpPhi { 165 // Force the control value to be scheduled at the end, 166 // unless it is a phi value (which must be first). 167 score[b.Control.ID] = ScoreControl 168 169 // Schedule values dependent on the control value at the end. 170 // This reduces the number of register spills. We don't find 171 // all values that depend on the control, just values with a 172 // direct dependency. This is cheaper and in testing there 173 // was no difference in the number of spills. 174 for _, v := range b.Values { 175 if v.Op != OpPhi { 176 for _, a := range v.Args { 177 if a == b.Control { 178 score[v.ID] = ScoreControl 179 } 180 } 181 } 182 } 183 } 184 185 // To put things into a priority queue 186 // The values that should come last are least. 187 priq.score = score 188 priq.a = priq.a[:0] 189 190 // Initialize priority queue with schedulable values. 191 for _, v := range b.Values { 192 if uses[v.ID] == 0 { 193 heap.Push(priq, v) 194 } 195 } 196 197 // Schedule highest priority value, update use counts, repeat. 198 order = order[:0] 199 tuples := make(map[ID][]*Value) 200 for { 201 // Find highest priority schedulable value. 202 // Note that schedule is assembled backwards. 203 204 if priq.Len() == 0 { 205 break 206 } 207 208 v := heap.Pop(priq).(*Value) 209 210 // Add it to the schedule. 211 // Do not emit tuple-reading ops until we're ready to emit the tuple-generating op. 212 //TODO: maybe remove ReadTuple score above, if it does not help on performance 213 switch { 214 case v.Op == OpSelect0: 215 if tuples[v.Args[0].ID] == nil { 216 tuples[v.Args[0].ID] = make([]*Value, 2) 217 } 218 tuples[v.Args[0].ID][0] = v 219 case v.Op == OpSelect1: 220 if tuples[v.Args[0].ID] == nil { 221 tuples[v.Args[0].ID] = make([]*Value, 2) 222 } 223 tuples[v.Args[0].ID][1] = v 224 case v.Type.IsTuple() && tuples[v.ID] != nil: 225 if tuples[v.ID][1] != nil { 226 order = append(order, tuples[v.ID][1]) 227 } 228 if tuples[v.ID][0] != nil { 229 order = append(order, tuples[v.ID][0]) 230 } 231 delete(tuples, v.ID) 232 fallthrough 233 default: 234 order = append(order, v) 235 } 236 237 // Update use counts of arguments. 238 for _, w := range v.Args { 239 if w.Block != b { 240 continue 241 } 242 uses[w.ID]-- 243 if uses[w.ID] == 0 { 244 // All uses scheduled, w is now schedulable. 245 heap.Push(priq, w) 246 } 247 } 248 for _, w := range additionalArgs[v.ID] { 249 uses[w.ID]-- 250 if uses[w.ID] == 0 { 251 // All uses scheduled, w is now schedulable. 252 heap.Push(priq, w) 253 } 254 } 255 } 256 if len(order) != len(b.Values) { 257 f.Fatalf("schedule does not include all values") 258 } 259 for i := 0; i < len(b.Values); i++ { 260 b.Values[i] = order[len(b.Values)-1-i] 261 } 262 } 263 264 f.scheduled = true 265 }