github.com/corona10/go@v0.0.0-20180224231303-7a218942be57/src/cmd/compile/internal/ssa/schedule.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import "container/heap" 8 9 const ( 10 ScorePhi = iota // towards top of block 11 ScoreNilCheck 12 ScoreReadTuple 13 ScoreVarDef 14 ScoreMemory 15 ScoreDefault 16 ScoreFlags 17 ScoreControl // towards bottom of block 18 ) 19 20 type ValHeap struct { 21 a []*Value 22 score []int8 23 } 24 25 func (h ValHeap) Len() int { return len(h.a) } 26 func (h ValHeap) Swap(i, j int) { a := h.a; a[i], a[j] = a[j], a[i] } 27 28 func (h *ValHeap) Push(x interface{}) { 29 // Push and Pop use pointer receivers because they modify the slice's length, 30 // not just its contents. 31 v := x.(*Value) 32 h.a = append(h.a, v) 33 } 34 func (h *ValHeap) Pop() interface{} { 35 old := h.a 36 n := len(old) 37 x := old[n-1] 38 h.a = old[0 : n-1] 39 return x 40 } 41 func (h ValHeap) Less(i, j int) bool { 42 x := h.a[i] 43 y := h.a[j] 44 sx := h.score[x.ID] 45 sy := h.score[y.ID] 46 if c := sx - sy; c != 0 { 47 return c > 0 // higher score comes later. 48 } 49 if x.Pos != y.Pos { // Favor in-order line stepping 50 return x.Pos.After(y.Pos) 51 } 52 if x.Op != OpPhi { 53 if c := len(x.Args) - len(y.Args); c != 0 { 54 return c < 0 // smaller args comes later 55 } 56 } 57 return x.ID > y.ID 58 } 59 60 // Schedule the Values in each Block. After this phase returns, the 61 // order of b.Values matters and is the order in which those values 62 // will appear in the assembly output. For now it generates a 63 // reasonable valid schedule using a priority queue. TODO(khr): 64 // schedule smarter. 65 func schedule(f *Func) { 66 // For each value, the number of times it is used in the block 67 // by values that have not been scheduled yet. 68 uses := make([]int32, f.NumValues()) 69 70 // reusable priority queue 71 priq := new(ValHeap) 72 73 // "priority" for a value 74 score := make([]int8, f.NumValues()) 75 76 // scheduling order. We queue values in this list in reverse order. 77 var order []*Value 78 79 // maps mem values to the next live memory value 80 nextMem := make([]*Value, f.NumValues()) 81 // additional pretend arguments for each Value. Used to enforce load/store ordering. 82 additionalArgs := make([][]*Value, f.NumValues()) 83 84 for _, b := range f.Blocks { 85 // Compute score. Larger numbers are scheduled closer to the end of the block. 86 for _, v := range b.Values { 87 switch { 88 case v.Op == OpAMD64LoweredGetClosurePtr || v.Op == OpPPC64LoweredGetClosurePtr || 89 v.Op == OpARMLoweredGetClosurePtr || v.Op == OpARM64LoweredGetClosurePtr || 90 v.Op == Op386LoweredGetClosurePtr || v.Op == OpMIPS64LoweredGetClosurePtr || 91 v.Op == OpS390XLoweredGetClosurePtr || v.Op == OpMIPSLoweredGetClosurePtr: 92 // We also score GetLoweredClosurePtr as early as possible to ensure that the 93 // context register is not stomped. GetLoweredClosurePtr should only appear 94 // in the entry block where there are no phi functions, so there is no 95 // conflict or ambiguity here. 96 if b != f.Entry { 97 f.Fatalf("LoweredGetClosurePtr appeared outside of entry block, b=%s", b.String()) 98 } 99 score[v.ID] = ScorePhi 100 case v.Op == OpAMD64LoweredNilCheck || v.Op == OpPPC64LoweredNilCheck || 101 v.Op == OpARMLoweredNilCheck || v.Op == OpARM64LoweredNilCheck || 102 v.Op == Op386LoweredNilCheck || v.Op == OpMIPS64LoweredNilCheck || 103 v.Op == OpS390XLoweredNilCheck || v.Op == OpMIPSLoweredNilCheck: 104 // Nil checks must come before loads from the same address. 105 score[v.ID] = ScoreNilCheck 106 case v.Op == OpPhi: 107 // We want all the phis first. 108 score[v.ID] = ScorePhi 109 case v.Op == OpVarDef: 110 // We want all the vardefs next. 111 score[v.ID] = ScoreVarDef 112 case v.Type.IsMemory(): 113 // Schedule stores as early as possible. This tends to 114 // reduce register pressure. It also helps make sure 115 // VARDEF ops are scheduled before the corresponding LEA. 116 score[v.ID] = ScoreMemory 117 case v.Op == OpSelect0 || v.Op == OpSelect1: 118 // Schedule the pseudo-op of reading part of a tuple 119 // immediately after the tuple-generating op, since 120 // this value is already live. This also removes its 121 // false dependency on the other part of the tuple. 122 // Also ensures tuple is never spilled. 123 score[v.ID] = ScoreReadTuple 124 case v.Type.IsFlags() || v.Type.IsTuple(): 125 // Schedule flag register generation as late as possible. 126 // This makes sure that we only have one live flags 127 // value at a time. 128 score[v.ID] = ScoreFlags 129 default: 130 score[v.ID] = ScoreDefault 131 } 132 } 133 } 134 135 for _, b := range f.Blocks { 136 // Find store chain for block. 137 // Store chains for different blocks overwrite each other, so 138 // the calculated store chain is good only for this block. 139 for _, v := range b.Values { 140 if v.Op != OpPhi && v.Type.IsMemory() { 141 for _, w := range v.Args { 142 if w.Type.IsMemory() { 143 nextMem[w.ID] = v 144 } 145 } 146 } 147 } 148 149 // Compute uses. 150 for _, v := range b.Values { 151 if v.Op == OpPhi { 152 // If a value is used by a phi, it does not induce 153 // a scheduling edge because that use is from the 154 // previous iteration. 155 continue 156 } 157 for _, w := range v.Args { 158 if w.Block == b { 159 uses[w.ID]++ 160 } 161 // Any load must come before the following store. 162 if !v.Type.IsMemory() && w.Type.IsMemory() { 163 // v is a load. 164 s := nextMem[w.ID] 165 if s == nil || s.Block != b { 166 continue 167 } 168 additionalArgs[s.ID] = append(additionalArgs[s.ID], v) 169 uses[v.ID]++ 170 } 171 } 172 } 173 174 if b.Control != nil && b.Control.Op != OpPhi { 175 // Force the control value to be scheduled at the end, 176 // unless it is a phi value (which must be first). 177 score[b.Control.ID] = ScoreControl 178 179 // Schedule values dependent on the control value at the end. 180 // This reduces the number of register spills. We don't find 181 // all values that depend on the control, just values with a 182 // direct dependency. This is cheaper and in testing there 183 // was no difference in the number of spills. 184 for _, v := range b.Values { 185 if v.Op != OpPhi { 186 for _, a := range v.Args { 187 if a == b.Control { 188 score[v.ID] = ScoreControl 189 } 190 } 191 } 192 } 193 } 194 195 // To put things into a priority queue 196 // The values that should come last are least. 197 priq.score = score 198 priq.a = priq.a[:0] 199 200 // Initialize priority queue with schedulable values. 201 for _, v := range b.Values { 202 if uses[v.ID] == 0 { 203 heap.Push(priq, v) 204 } 205 } 206 207 // Schedule highest priority value, update use counts, repeat. 208 order = order[:0] 209 tuples := make(map[ID][]*Value) 210 for { 211 // Find highest priority schedulable value. 212 // Note that schedule is assembled backwards. 213 214 if priq.Len() == 0 { 215 break 216 } 217 218 v := heap.Pop(priq).(*Value) 219 220 // Add it to the schedule. 221 // Do not emit tuple-reading ops until we're ready to emit the tuple-generating op. 222 //TODO: maybe remove ReadTuple score above, if it does not help on performance 223 switch { 224 case v.Op == OpSelect0: 225 if tuples[v.Args[0].ID] == nil { 226 tuples[v.Args[0].ID] = make([]*Value, 2) 227 } 228 tuples[v.Args[0].ID][0] = v 229 case v.Op == OpSelect1: 230 if tuples[v.Args[0].ID] == nil { 231 tuples[v.Args[0].ID] = make([]*Value, 2) 232 } 233 tuples[v.Args[0].ID][1] = v 234 case v.Type.IsTuple() && tuples[v.ID] != nil: 235 if tuples[v.ID][1] != nil { 236 order = append(order, tuples[v.ID][1]) 237 } 238 if tuples[v.ID][0] != nil { 239 order = append(order, tuples[v.ID][0]) 240 } 241 delete(tuples, v.ID) 242 fallthrough 243 default: 244 order = append(order, v) 245 } 246 247 // Update use counts of arguments. 248 for _, w := range v.Args { 249 if w.Block != b { 250 continue 251 } 252 uses[w.ID]-- 253 if uses[w.ID] == 0 { 254 // All uses scheduled, w is now schedulable. 255 heap.Push(priq, w) 256 } 257 } 258 for _, w := range additionalArgs[v.ID] { 259 uses[w.ID]-- 260 if uses[w.ID] == 0 { 261 // All uses scheduled, w is now schedulable. 262 heap.Push(priq, w) 263 } 264 } 265 } 266 if len(order) != len(b.Values) { 267 f.Fatalf("schedule does not include all values") 268 } 269 for i := 0; i < len(b.Values); i++ { 270 b.Values[i] = order[len(b.Values)-1-i] 271 } 272 } 273 274 f.scheduled = true 275 } 276 277 // storeOrder orders values with respect to stores. That is, 278 // if v transitively depends on store s, v is ordered after s, 279 // otherwise v is ordered before s. 280 // Specifically, values are ordered like 281 // store1 282 // NilCheck that depends on store1 283 // other values that depends on store1 284 // store2 285 // NilCheck that depends on store2 286 // other values that depends on store2 287 // ... 288 // The order of non-store and non-NilCheck values are undefined 289 // (not necessarily dependency order). This should be cheaper 290 // than a full scheduling as done above. 291 // Note that simple dependency order won't work: there is no 292 // dependency between NilChecks and values like IsNonNil. 293 // Auxiliary data structures are passed in as arguments, so 294 // that they can be allocated in the caller and be reused. 295 // This function takes care of reset them. 296 func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value { 297 if len(values) == 0 { 298 return values 299 } 300 301 f := values[0].Block.Func 302 303 // find all stores 304 var stores []*Value // members of values that are store values 305 hasNilCheck := false 306 sset.clear() // sset is the set of stores that are used in other values 307 for _, v := range values { 308 if v.Type.IsMemory() { 309 stores = append(stores, v) 310 if v.Op == OpInitMem || v.Op == OpPhi { 311 continue 312 } 313 sset.add(v.MemoryArg().ID) // record that v's memory arg is used 314 } 315 if v.Op == OpNilCheck { 316 hasNilCheck = true 317 } 318 } 319 if len(stores) == 0 || !hasNilCheck && f.pass.name == "nilcheckelim" { 320 // there is no store, the order does not matter 321 return values 322 } 323 324 // find last store, which is the one that is not used by other stores 325 var last *Value 326 for _, v := range stores { 327 if !sset.contains(v.ID) { 328 if last != nil { 329 f.Fatalf("two stores live simultaneously: %v and %v", v, last) 330 } 331 last = v 332 } 333 } 334 335 // We assign a store number to each value. Store number is the 336 // index of the latest store that this value transitively depends. 337 // The i-th store in the current block gets store number 3*i. A nil 338 // check that depends on the i-th store gets store number 3*i+1. 339 // Other values that depends on the i-th store gets store number 3*i+2. 340 // Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends 341 // is in the previous block (or no store at all, e.g. value is Const). 342 // First we assign the number to all stores by walking back the store chain, 343 // then assign the number to other values in DFS order. 344 count := make([]int32, 3*(len(stores)+1)) 345 sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once 346 for n, w := len(stores), last; n > 0; n-- { 347 storeNumber[w.ID] = int32(3 * n) 348 count[3*n]++ 349 sset.add(w.ID) 350 if w.Op == OpInitMem || w.Op == OpPhi { 351 if n != 1 { 352 f.Fatalf("store order is wrong: there are stores before %v", w) 353 } 354 break 355 } 356 w = w.MemoryArg() 357 } 358 var stack []*Value 359 for _, v := range values { 360 if sset.contains(v.ID) { 361 // in sset means v is a store, or already pushed to stack, or already assigned a store number 362 continue 363 } 364 stack = append(stack, v) 365 sset.add(v.ID) 366 367 for len(stack) > 0 { 368 w := stack[len(stack)-1] 369 if storeNumber[w.ID] != 0 { 370 stack = stack[:len(stack)-1] 371 continue 372 } 373 if w.Op == OpPhi { 374 // Phi value doesn't depend on store in the current block. 375 // Do this early to avoid dependency cycle. 376 storeNumber[w.ID] = 2 377 count[2]++ 378 stack = stack[:len(stack)-1] 379 continue 380 } 381 382 max := int32(0) // latest store dependency 383 argsdone := true 384 for _, a := range w.Args { 385 if a.Block != w.Block { 386 continue 387 } 388 if !sset.contains(a.ID) { 389 stack = append(stack, a) 390 sset.add(a.ID) 391 argsdone = false 392 break 393 } 394 if storeNumber[a.ID]/3 > max { 395 max = storeNumber[a.ID] / 3 396 } 397 } 398 if !argsdone { 399 continue 400 } 401 402 n := 3*max + 2 403 if w.Op == OpNilCheck { 404 n = 3*max + 1 405 } 406 storeNumber[w.ID] = n 407 count[n]++ 408 stack = stack[:len(stack)-1] 409 } 410 } 411 412 // convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i] 413 for i := range count { 414 if i == 0 { 415 continue 416 } 417 count[i] += count[i-1] 418 } 419 if count[len(count)-1] != int32(len(values)) { 420 f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values) 421 } 422 423 // place values in count-indexed bins, which are in the desired store order 424 order := make([]*Value, len(values)) 425 for _, v := range values { 426 s := storeNumber[v.ID] 427 order[count[s-1]] = v 428 count[s-1]++ 429 } 430 431 return order 432 }