github.com/slayercat/go@v0.0.0-20170428012452-c51559813f61/src/cmd/compile/internal/ssa/schedule.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import "container/heap" 8 9 const ( 10 ScorePhi = iota // towards top of block 11 ScoreNilCheck 12 ScoreReadTuple 13 ScoreVarDef 14 ScoreMemory 15 ScoreDefault 16 ScoreFlags 17 ScoreControl // towards bottom of block 18 ) 19 20 type ValHeap struct { 21 a []*Value 22 score []int8 23 } 24 25 func (h ValHeap) Len() int { return len(h.a) } 26 func (h ValHeap) Swap(i, j int) { a := h.a; a[i], a[j] = a[j], a[i] } 27 28 func (h *ValHeap) Push(x interface{}) { 29 // Push and Pop use pointer receivers because they modify the slice's length, 30 // not just its contents. 31 v := x.(*Value) 32 h.a = append(h.a, v) 33 } 34 func (h *ValHeap) Pop() interface{} { 35 old := h.a 36 n := len(old) 37 x := old[n-1] 38 h.a = old[0 : n-1] 39 return x 40 } 41 func (h ValHeap) Less(i, j int) bool { 42 x := h.a[i] 43 y := h.a[j] 44 sx := h.score[x.ID] 45 sy := h.score[y.ID] 46 if c := sx - sy; c != 0 { 47 return c > 0 // higher score comes later. 48 } 49 if x.Pos != y.Pos { // Favor in-order line stepping 50 return x.Pos.After(y.Pos) 51 } 52 if x.Op != OpPhi { 53 if c := len(x.Args) - len(y.Args); c != 0 { 54 return c < 0 // smaller args comes later 55 } 56 } 57 return x.ID > y.ID 58 } 59 60 // Schedule the Values in each Block. After this phase returns, the 61 // order of b.Values matters and is the order in which those values 62 // will appear in the assembly output. For now it generates a 63 // reasonable valid schedule using a priority queue. TODO(khr): 64 // schedule smarter. 65 func schedule(f *Func) { 66 // For each value, the number of times it is used in the block 67 // by values that have not been scheduled yet. 68 uses := make([]int32, f.NumValues()) 69 70 // reusable priority queue 71 priq := new(ValHeap) 72 73 // "priority" for a value 74 score := make([]int8, f.NumValues()) 75 76 // scheduling order. We queue values in this list in reverse order. 77 var order []*Value 78 79 // maps mem values to the next live memory value 80 nextMem := make([]*Value, f.NumValues()) 81 // additional pretend arguments for each Value. Used to enforce load/store ordering. 82 additionalArgs := make([][]*Value, f.NumValues()) 83 84 for _, b := range f.Blocks { 85 // Compute score. Larger numbers are scheduled closer to the end of the block. 86 for _, v := range b.Values { 87 switch { 88 case v.Op == OpAMD64LoweredGetClosurePtr || v.Op == OpPPC64LoweredGetClosurePtr || 89 v.Op == OpARMLoweredGetClosurePtr || v.Op == OpARM64LoweredGetClosurePtr || 90 v.Op == Op386LoweredGetClosurePtr || v.Op == OpMIPS64LoweredGetClosurePtr || 91 v.Op == OpS390XLoweredGetClosurePtr || v.Op == OpMIPSLoweredGetClosurePtr: 92 // We also score GetLoweredClosurePtr as early as possible to ensure that the 93 // context register is not stomped. GetLoweredClosurePtr should only appear 94 // in the entry block where there are no phi functions, so there is no 95 // conflict or ambiguity here. 96 if b != f.Entry { 97 f.Fatalf("LoweredGetClosurePtr appeared outside of entry block, b=%s", b.String()) 98 } 99 score[v.ID] = ScorePhi 100 case v.Op == OpAMD64LoweredNilCheck || v.Op == OpPPC64LoweredNilCheck || 101 v.Op == OpARMLoweredNilCheck || v.Op == OpARM64LoweredNilCheck || 102 v.Op == Op386LoweredNilCheck || v.Op == OpMIPS64LoweredNilCheck || 103 v.Op == OpS390XLoweredNilCheck || v.Op == OpMIPSLoweredNilCheck: 104 // Nil checks must come before loads from the same address. 105 score[v.ID] = ScoreNilCheck 106 case v.Op == OpPhi: 107 // We want all the phis first. 108 score[v.ID] = ScorePhi 109 case v.Op == OpVarDef: 110 // We want all the vardefs next. 111 score[v.ID] = ScoreVarDef 112 case v.Type.IsMemory(): 113 // Schedule stores as early as possible. This tends to 114 // reduce register pressure. It also helps make sure 115 // VARDEF ops are scheduled before the corresponding LEA. 116 score[v.ID] = ScoreMemory 117 case v.Op == OpSelect0 || v.Op == OpSelect1: 118 // Schedule the pseudo-op of reading part of a tuple 119 // immediately after the tuple-generating op, since 120 // this value is already live. This also removes its 121 // false dependency on the other part of the tuple. 122 // Also ensures tuple is never spilled. 123 score[v.ID] = ScoreReadTuple 124 case v.Type.IsFlags() || v.Type.IsTuple(): 125 // Schedule flag register generation as late as possible. 126 // This makes sure that we only have one live flags 127 // value at a time. 128 score[v.ID] = ScoreFlags 129 default: 130 score[v.ID] = ScoreDefault 131 } 132 } 133 } 134 135 for _, b := range f.Blocks { 136 // Find store chain for block. 137 // Store chains for different blocks overwrite each other, so 138 // the calculated store chain is good only for this block. 139 for _, v := range b.Values { 140 if v.Op != OpPhi && v.Type.IsMemory() { 141 mem := v 142 if v.Op == OpSelect1 { 143 v = v.Args[0] 144 } 145 for _, w := range v.Args { 146 if w.Type.IsMemory() { 147 nextMem[w.ID] = mem 148 } 149 } 150 } 151 } 152 153 // Compute uses. 154 for _, v := range b.Values { 155 if v.Op == OpPhi { 156 // If a value is used by a phi, it does not induce 157 // a scheduling edge because that use is from the 158 // previous iteration. 159 continue 160 } 161 for _, w := range v.Args { 162 if w.Block == b { 163 uses[w.ID]++ 164 } 165 // Any load must come before the following store. 166 if v.Type.IsMemory() || !w.Type.IsMemory() { 167 continue // not a load 168 } 169 s := nextMem[w.ID] 170 if s == nil || s.Block != b { 171 continue 172 } 173 additionalArgs[s.ID] = append(additionalArgs[s.ID], v) 174 uses[v.ID]++ 175 } 176 } 177 178 if b.Control != nil && b.Control.Op != OpPhi { 179 // Force the control value to be scheduled at the end, 180 // unless it is a phi value (which must be first). 181 score[b.Control.ID] = ScoreControl 182 183 // Schedule values dependent on the control value at the end. 184 // This reduces the number of register spills. We don't find 185 // all values that depend on the control, just values with a 186 // direct dependency. This is cheaper and in testing there 187 // was no difference in the number of spills. 188 for _, v := range b.Values { 189 if v.Op != OpPhi { 190 for _, a := range v.Args { 191 if a == b.Control { 192 score[v.ID] = ScoreControl 193 } 194 } 195 } 196 } 197 } 198 199 // To put things into a priority queue 200 // The values that should come last are least. 201 priq.score = score 202 priq.a = priq.a[:0] 203 204 // Initialize priority queue with schedulable values. 205 for _, v := range b.Values { 206 if uses[v.ID] == 0 { 207 heap.Push(priq, v) 208 } 209 } 210 211 // Schedule highest priority value, update use counts, repeat. 212 order = order[:0] 213 tuples := make(map[ID][]*Value) 214 for { 215 // Find highest priority schedulable value. 216 // Note that schedule is assembled backwards. 217 218 if priq.Len() == 0 { 219 break 220 } 221 222 v := heap.Pop(priq).(*Value) 223 224 // Add it to the schedule. 225 // Do not emit tuple-reading ops until we're ready to emit the tuple-generating op. 226 //TODO: maybe remove ReadTuple score above, if it does not help on performance 227 switch { 228 case v.Op == OpSelect0: 229 if tuples[v.Args[0].ID] == nil { 230 tuples[v.Args[0].ID] = make([]*Value, 2) 231 } 232 tuples[v.Args[0].ID][0] = v 233 case v.Op == OpSelect1: 234 if tuples[v.Args[0].ID] == nil { 235 tuples[v.Args[0].ID] = make([]*Value, 2) 236 } 237 tuples[v.Args[0].ID][1] = v 238 case v.Type.IsTuple() && tuples[v.ID] != nil: 239 if tuples[v.ID][1] != nil { 240 order = append(order, tuples[v.ID][1]) 241 } 242 if tuples[v.ID][0] != nil { 243 order = append(order, tuples[v.ID][0]) 244 } 245 delete(tuples, v.ID) 246 fallthrough 247 default: 248 order = append(order, v) 249 } 250 251 // Update use counts of arguments. 252 for _, w := range v.Args { 253 if w.Block != b { 254 continue 255 } 256 uses[w.ID]-- 257 if uses[w.ID] == 0 { 258 // All uses scheduled, w is now schedulable. 259 heap.Push(priq, w) 260 } 261 } 262 for _, w := range additionalArgs[v.ID] { 263 uses[w.ID]-- 264 if uses[w.ID] == 0 { 265 // All uses scheduled, w is now schedulable. 266 heap.Push(priq, w) 267 } 268 } 269 } 270 if len(order) != len(b.Values) { 271 f.Fatalf("schedule does not include all values") 272 } 273 for i := 0; i < len(b.Values); i++ { 274 b.Values[i] = order[len(b.Values)-1-i] 275 } 276 } 277 278 f.scheduled = true 279 } 280 281 // storeOrder orders values with respect to stores. That is, 282 // if v transitively depends on store s, v is ordered after s, 283 // otherwise v is ordered before s. 284 // Specifically, values are ordered like 285 // store1 286 // NilCheck that depends on store1 287 // other values that depends on store1 288 // store2 289 // NilCheck that depends on store2 290 // other values that depends on store2 291 // ... 292 // The order of non-store and non-NilCheck values are undefined 293 // (not necessarily dependency order). This should be cheaper 294 // than a full scheduling as done above. 295 // Note that simple dependency order won't work: there is no 296 // dependency between NilChecks and values like IsNonNil. 297 // Auxiliary data structures are passed in as arguments, so 298 // that they can be allocated in the caller and be reused. 299 // This function takes care of reset them. 300 func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value { 301 if len(values) == 0 { 302 return values 303 } 304 305 f := values[0].Block.Func 306 307 // find all stores 308 var stores []*Value // members of values that are store values 309 hasNilCheck := false 310 sset.clear() // sset is the set of stores that are used in other values 311 for _, v := range values { 312 if v.Type.IsMemory() { 313 stores = append(stores, v) 314 if v.Op == OpInitMem || v.Op == OpPhi { 315 continue 316 } 317 a := v 318 if v.Op == OpSelect1 { 319 a = a.Args[0] 320 } 321 sset.add(a.MemoryArg().ID) // record that v's memory arg is used 322 } 323 if v.Op == OpNilCheck { 324 hasNilCheck = true 325 } 326 } 327 if len(stores) == 0 || !hasNilCheck && f.pass.name == "nilcheckelim" { 328 // there is no store, the order does not matter 329 return values 330 } 331 332 // find last store, which is the one that is not used by other stores 333 var last *Value 334 for _, v := range stores { 335 if !sset.contains(v.ID) { 336 if last != nil { 337 f.Fatalf("two stores live simutaneously: %v and %v", v, last) 338 } 339 last = v 340 } 341 } 342 343 // We assign a store number to each value. Store number is the 344 // index of the latest store that this value transitively depends. 345 // The i-th store in the current block gets store number 3*i. A nil 346 // check that depends on the i-th store gets store number 3*i+1. 347 // Other values that depends on the i-th store gets store number 3*i+2. 348 // Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends 349 // is in the previous block (or no store at all, e.g. value is Const). 350 // First we assign the number to all stores by walking back the store chain, 351 // then assign the number to other values in DFS order. 352 count := make([]int32, 3*(len(stores)+1)) 353 sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once 354 for n, w := len(stores), last; n > 0; n-- { 355 storeNumber[w.ID] = int32(3 * n) 356 count[3*n]++ 357 sset.add(w.ID) 358 if w.Op == OpInitMem || w.Op == OpPhi { 359 if n != 1 { 360 f.Fatalf("store order is wrong: there are stores before %v", w) 361 } 362 break 363 } 364 if w.Op == OpSelect1 { 365 w = w.Args[0] 366 } 367 w = w.MemoryArg() 368 } 369 var stack []*Value 370 for _, v := range values { 371 if sset.contains(v.ID) { 372 // in sset means v is a store, or already pushed to stack, or already assigned a store number 373 continue 374 } 375 stack = append(stack, v) 376 sset.add(v.ID) 377 378 for len(stack) > 0 { 379 w := stack[len(stack)-1] 380 if storeNumber[w.ID] != 0 { 381 stack = stack[:len(stack)-1] 382 continue 383 } 384 if w.Op == OpPhi { 385 // Phi value doesn't depend on store in the current block. 386 // Do this early to avoid dependency cycle. 387 storeNumber[w.ID] = 2 388 count[2]++ 389 stack = stack[:len(stack)-1] 390 continue 391 } 392 393 max := int32(0) // latest store dependency 394 argsdone := true 395 for _, a := range w.Args { 396 if a.Block != w.Block { 397 continue 398 } 399 if !sset.contains(a.ID) { 400 stack = append(stack, a) 401 sset.add(a.ID) 402 argsdone = false 403 break 404 } 405 if storeNumber[a.ID]/3 > max { 406 max = storeNumber[a.ID] / 3 407 } 408 } 409 if !argsdone { 410 continue 411 } 412 413 n := 3*max + 2 414 if w.Op == OpNilCheck { 415 n = 3*max + 1 416 } 417 storeNumber[w.ID] = n 418 count[n]++ 419 stack = stack[:len(stack)-1] 420 } 421 } 422 423 // convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i] 424 for i := range count { 425 if i == 0 { 426 continue 427 } 428 count[i] += count[i-1] 429 } 430 if count[len(count)-1] != int32(len(values)) { 431 f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values) 432 } 433 434 // place values in count-indexed bins, which are in the desired store order 435 order := make([]*Value, len(values)) 436 for _, v := range values { 437 s := storeNumber[v.ID] 438 order[count[s-1]] = v 439 count[s-1]++ 440 } 441 442 return order 443 }