github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/cmd/compile/internal/ssa/schedule.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import "container/heap" 8 9 const ( 10 ScorePhi = iota // towards top of block 11 ScoreArg 12 ScoreNilCheck 13 ScoreReadTuple 14 ScoreVarDef 15 ScoreMemory 16 ScoreDefault 17 ScoreFlags 18 ScoreControl // towards bottom of block 19 ) 20 21 type ValHeap struct { 22 a []*Value 23 score []int8 24 } 25 26 func (h ValHeap) Len() int { return len(h.a) } 27 func (h ValHeap) Swap(i, j int) { a := h.a; a[i], a[j] = a[j], a[i] } 28 29 func (h *ValHeap) Push(x interface{}) { 30 // Push and Pop use pointer receivers because they modify the slice's length, 31 // not just its contents. 32 v := x.(*Value) 33 h.a = append(h.a, v) 34 } 35 func (h *ValHeap) Pop() interface{} { 36 old := h.a 37 n := len(old) 38 x := old[n-1] 39 h.a = old[0 : n-1] 40 return x 41 } 42 func (h ValHeap) Less(i, j int) bool { 43 x := h.a[i] 44 y := h.a[j] 45 sx := h.score[x.ID] 46 sy := h.score[y.ID] 47 if c := sx - sy; c != 0 { 48 return c > 0 // higher score comes later. 49 } 50 if x.Pos != y.Pos { // Favor in-order line stepping 51 return x.Pos.After(y.Pos) 52 } 53 if x.Op != OpPhi { 54 if c := len(x.Args) - len(y.Args); c != 0 { 55 return c < 0 // smaller args comes later 56 } 57 } 58 return x.ID > y.ID 59 } 60 61 // Schedule the Values in each Block. After this phase returns, the 62 // order of b.Values matters and is the order in which those values 63 // will appear in the assembly output. For now it generates a 64 // reasonable valid schedule using a priority queue. TODO(khr): 65 // schedule smarter. 66 func schedule(f *Func) { 67 // For each value, the number of times it is used in the block 68 // by values that have not been scheduled yet. 69 uses := make([]int32, f.NumValues()) 70 71 // reusable priority queue 72 priq := new(ValHeap) 73 74 // "priority" for a value 75 score := make([]int8, f.NumValues()) 76 77 // scheduling order. We queue values in this list in reverse order. 78 // A constant bound allows this to be stack-allocated. 64 is 79 // enough to cover almost every schedule call. 80 order := make([]*Value, 0, 64) 81 82 // maps mem values to the next live memory value 83 nextMem := make([]*Value, f.NumValues()) 84 // additional pretend arguments for each Value. Used to enforce load/store ordering. 85 additionalArgs := make([][]*Value, f.NumValues()) 86 87 for _, b := range f.Blocks { 88 // Compute score. Larger numbers are scheduled closer to the end of the block. 89 for _, v := range b.Values { 90 switch { 91 case v.Op == OpAMD64LoweredGetClosurePtr || v.Op == OpPPC64LoweredGetClosurePtr || 92 v.Op == OpARMLoweredGetClosurePtr || v.Op == OpARM64LoweredGetClosurePtr || 93 v.Op == Op386LoweredGetClosurePtr || v.Op == OpMIPS64LoweredGetClosurePtr || 94 v.Op == OpS390XLoweredGetClosurePtr || v.Op == OpMIPSLoweredGetClosurePtr || 95 v.Op == OpWasmLoweredGetClosurePtr: 96 // We also score GetLoweredClosurePtr as early as possible to ensure that the 97 // context register is not stomped. GetLoweredClosurePtr should only appear 98 // in the entry block where there are no phi functions, so there is no 99 // conflict or ambiguity here. 100 if b != f.Entry { 101 f.Fatalf("LoweredGetClosurePtr appeared outside of entry block, b=%s", b.String()) 102 } 103 score[v.ID] = ScorePhi 104 case v.Op == OpAMD64LoweredNilCheck || v.Op == OpPPC64LoweredNilCheck || 105 v.Op == OpARMLoweredNilCheck || v.Op == OpARM64LoweredNilCheck || 106 v.Op == Op386LoweredNilCheck || v.Op == OpMIPS64LoweredNilCheck || 107 v.Op == OpS390XLoweredNilCheck || v.Op == OpMIPSLoweredNilCheck || 108 v.Op == OpWasmLoweredNilCheck: 109 // Nil checks must come before loads from the same address. 110 score[v.ID] = ScoreNilCheck 111 case v.Op == OpPhi: 112 // We want all the phis first. 113 score[v.ID] = ScorePhi 114 case v.Op == OpVarDef: 115 // We want all the vardefs next. 116 score[v.ID] = ScoreVarDef 117 case v.Op == OpArg: 118 // We want all the args as early as possible, for better debugging. 119 score[v.ID] = ScoreArg 120 case v.Type.IsMemory(): 121 // Schedule stores as early as possible. This tends to 122 // reduce register pressure. It also helps make sure 123 // VARDEF ops are scheduled before the corresponding LEA. 124 score[v.ID] = ScoreMemory 125 case v.Op == OpSelect0 || v.Op == OpSelect1: 126 // Schedule the pseudo-op of reading part of a tuple 127 // immediately after the tuple-generating op, since 128 // this value is already live. This also removes its 129 // false dependency on the other part of the tuple. 130 // Also ensures tuple is never spilled. 131 score[v.ID] = ScoreReadTuple 132 case v.Type.IsFlags() || v.Type.IsTuple(): 133 // Schedule flag register generation as late as possible. 134 // This makes sure that we only have one live flags 135 // value at a time. 136 score[v.ID] = ScoreFlags 137 default: 138 score[v.ID] = ScoreDefault 139 } 140 } 141 } 142 143 for _, b := range f.Blocks { 144 // Find store chain for block. 145 // Store chains for different blocks overwrite each other, so 146 // the calculated store chain is good only for this block. 147 for _, v := range b.Values { 148 if v.Op != OpPhi && v.Type.IsMemory() { 149 for _, w := range v.Args { 150 if w.Type.IsMemory() { 151 nextMem[w.ID] = v 152 } 153 } 154 } 155 } 156 157 // Compute uses. 158 for _, v := range b.Values { 159 if v.Op == OpPhi { 160 // If a value is used by a phi, it does not induce 161 // a scheduling edge because that use is from the 162 // previous iteration. 163 continue 164 } 165 for _, w := range v.Args { 166 if w.Block == b { 167 uses[w.ID]++ 168 } 169 // Any load must come before the following store. 170 if !v.Type.IsMemory() && w.Type.IsMemory() { 171 // v is a load. 172 s := nextMem[w.ID] 173 if s == nil || s.Block != b { 174 continue 175 } 176 additionalArgs[s.ID] = append(additionalArgs[s.ID], v) 177 uses[v.ID]++ 178 } 179 } 180 } 181 182 if b.Control != nil && b.Control.Op != OpPhi { 183 // Force the control value to be scheduled at the end, 184 // unless it is a phi value (which must be first). 185 score[b.Control.ID] = ScoreControl 186 187 // Schedule values dependent on the control value at the end. 188 // This reduces the number of register spills. We don't find 189 // all values that depend on the control, just values with a 190 // direct dependency. This is cheaper and in testing there 191 // was no difference in the number of spills. 192 for _, v := range b.Values { 193 if v.Op != OpPhi { 194 for _, a := range v.Args { 195 if a == b.Control { 196 score[v.ID] = ScoreControl 197 } 198 } 199 } 200 } 201 } 202 203 // To put things into a priority queue 204 // The values that should come last are least. 205 priq.score = score 206 priq.a = priq.a[:0] 207 208 // Initialize priority queue with schedulable values. 209 for _, v := range b.Values { 210 if uses[v.ID] == 0 { 211 heap.Push(priq, v) 212 } 213 } 214 215 // Schedule highest priority value, update use counts, repeat. 216 order = order[:0] 217 tuples := make(map[ID][]*Value) 218 for { 219 // Find highest priority schedulable value. 220 // Note that schedule is assembled backwards. 221 222 if priq.Len() == 0 { 223 break 224 } 225 226 v := heap.Pop(priq).(*Value) 227 228 // Add it to the schedule. 229 // Do not emit tuple-reading ops until we're ready to emit the tuple-generating op. 230 //TODO: maybe remove ReadTuple score above, if it does not help on performance 231 switch { 232 case v.Op == OpSelect0: 233 if tuples[v.Args[0].ID] == nil { 234 tuples[v.Args[0].ID] = make([]*Value, 2) 235 } 236 tuples[v.Args[0].ID][0] = v 237 case v.Op == OpSelect1: 238 if tuples[v.Args[0].ID] == nil { 239 tuples[v.Args[0].ID] = make([]*Value, 2) 240 } 241 tuples[v.Args[0].ID][1] = v 242 case v.Type.IsTuple() && tuples[v.ID] != nil: 243 if tuples[v.ID][1] != nil { 244 order = append(order, tuples[v.ID][1]) 245 } 246 if tuples[v.ID][0] != nil { 247 order = append(order, tuples[v.ID][0]) 248 } 249 delete(tuples, v.ID) 250 fallthrough 251 default: 252 order = append(order, v) 253 } 254 255 // Update use counts of arguments. 256 for _, w := range v.Args { 257 if w.Block != b { 258 continue 259 } 260 uses[w.ID]-- 261 if uses[w.ID] == 0 { 262 // All uses scheduled, w is now schedulable. 263 heap.Push(priq, w) 264 } 265 } 266 for _, w := range additionalArgs[v.ID] { 267 uses[w.ID]-- 268 if uses[w.ID] == 0 { 269 // All uses scheduled, w is now schedulable. 270 heap.Push(priq, w) 271 } 272 } 273 } 274 if len(order) != len(b.Values) { 275 f.Fatalf("schedule does not include all values in block %s", b) 276 } 277 for i := 0; i < len(b.Values); i++ { 278 b.Values[i] = order[len(b.Values)-1-i] 279 } 280 } 281 282 f.scheduled = true 283 } 284 285 // storeOrder orders values with respect to stores. That is, 286 // if v transitively depends on store s, v is ordered after s, 287 // otherwise v is ordered before s. 288 // Specifically, values are ordered like 289 // store1 290 // NilCheck that depends on store1 291 // other values that depends on store1 292 // store2 293 // NilCheck that depends on store2 294 // other values that depends on store2 295 // ... 296 // The order of non-store and non-NilCheck values are undefined 297 // (not necessarily dependency order). This should be cheaper 298 // than a full scheduling as done above. 299 // Note that simple dependency order won't work: there is no 300 // dependency between NilChecks and values like IsNonNil. 301 // Auxiliary data structures are passed in as arguments, so 302 // that they can be allocated in the caller and be reused. 303 // This function takes care of reset them. 304 func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value { 305 if len(values) == 0 { 306 return values 307 } 308 309 f := values[0].Block.Func 310 311 // find all stores 312 313 // Members of values that are store values. 314 // A constant bound allows this to be stack-allocated. 64 is 315 // enough to cover almost every storeOrder call. 316 stores := make([]*Value, 0, 64) 317 hasNilCheck := false 318 sset.clear() // sset is the set of stores that are used in other values 319 for _, v := range values { 320 if v.Type.IsMemory() { 321 stores = append(stores, v) 322 if v.Op == OpInitMem || v.Op == OpPhi { 323 continue 324 } 325 sset.add(v.MemoryArg().ID) // record that v's memory arg is used 326 } 327 if v.Op == OpNilCheck { 328 hasNilCheck = true 329 } 330 } 331 if len(stores) == 0 || !hasNilCheck && f.pass.name == "nilcheckelim" { 332 // there is no store, the order does not matter 333 return values 334 } 335 336 // find last store, which is the one that is not used by other stores 337 var last *Value 338 for _, v := range stores { 339 if !sset.contains(v.ID) { 340 if last != nil { 341 f.Fatalf("two stores live simultaneously: %v and %v", v, last) 342 } 343 last = v 344 } 345 } 346 347 // We assign a store number to each value. Store number is the 348 // index of the latest store that this value transitively depends. 349 // The i-th store in the current block gets store number 3*i. A nil 350 // check that depends on the i-th store gets store number 3*i+1. 351 // Other values that depends on the i-th store gets store number 3*i+2. 352 // Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends 353 // is in the previous block (or no store at all, e.g. value is Const). 354 // First we assign the number to all stores by walking back the store chain, 355 // then assign the number to other values in DFS order. 356 count := make([]int32, 3*(len(stores)+1)) 357 sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once 358 for n, w := len(stores), last; n > 0; n-- { 359 storeNumber[w.ID] = int32(3 * n) 360 count[3*n]++ 361 sset.add(w.ID) 362 if w.Op == OpInitMem || w.Op == OpPhi { 363 if n != 1 { 364 f.Fatalf("store order is wrong: there are stores before %v", w) 365 } 366 break 367 } 368 w = w.MemoryArg() 369 } 370 var stack []*Value 371 for _, v := range values { 372 if sset.contains(v.ID) { 373 // in sset means v is a store, or already pushed to stack, or already assigned a store number 374 continue 375 } 376 stack = append(stack, v) 377 sset.add(v.ID) 378 379 for len(stack) > 0 { 380 w := stack[len(stack)-1] 381 if storeNumber[w.ID] != 0 { 382 stack = stack[:len(stack)-1] 383 continue 384 } 385 if w.Op == OpPhi { 386 // Phi value doesn't depend on store in the current block. 387 // Do this early to avoid dependency cycle. 388 storeNumber[w.ID] = 2 389 count[2]++ 390 stack = stack[:len(stack)-1] 391 continue 392 } 393 394 max := int32(0) // latest store dependency 395 argsdone := true 396 for _, a := range w.Args { 397 if a.Block != w.Block { 398 continue 399 } 400 if !sset.contains(a.ID) { 401 stack = append(stack, a) 402 sset.add(a.ID) 403 argsdone = false 404 break 405 } 406 if storeNumber[a.ID]/3 > max { 407 max = storeNumber[a.ID] / 3 408 } 409 } 410 if !argsdone { 411 continue 412 } 413 414 n := 3*max + 2 415 if w.Op == OpNilCheck { 416 n = 3*max + 1 417 } 418 storeNumber[w.ID] = n 419 count[n]++ 420 stack = stack[:len(stack)-1] 421 } 422 } 423 424 // convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i] 425 for i := range count { 426 if i == 0 { 427 continue 428 } 429 count[i] += count[i-1] 430 } 431 if count[len(count)-1] != int32(len(values)) { 432 f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values) 433 } 434 435 // place values in count-indexed bins, which are in the desired store order 436 order := make([]*Value, len(values)) 437 for _, v := range values { 438 s := storeNumber[v.ID] 439 order[count[s-1]] = v 440 count[s-1]++ 441 } 442 443 return order 444 }