github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/ssa/cse.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import ( 8 "fmt" 9 "sort" 10 11 "github.com/go-asm/go/cmd/compile/types" 12 "github.com/go-asm/go/cmd/src" 13 ) 14 15 // cse does common-subexpression elimination on the Function. 16 // Values are just relinked, nothing is deleted. A subsequent deadcode 17 // pass is required to actually remove duplicate expressions. 18 func cse(f *Func) { 19 // Two values are equivalent if they satisfy the following definition: 20 // equivalent(v, w): 21 // v.op == w.op 22 // v.type == w.type 23 // v.aux == w.aux 24 // v.auxint == w.auxint 25 // len(v.args) == len(w.args) 26 // v.block == w.block if v.op == OpPhi 27 // equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1 28 29 // The algorithm searches for a partition of f's values into 30 // equivalence classes using the above definition. 31 // It starts with a coarse partition and iteratively refines it 32 // until it reaches a fixed point. 33 34 // Make initial coarse partitions by using a subset of the conditions above. 35 a := f.Cache.allocValueSlice(f.NumValues()) 36 defer func() { f.Cache.freeValueSlice(a) }() // inside closure to use final value of a 37 a = a[:0] 38 if f.auxmap == nil { 39 f.auxmap = auxmap{} 40 } 41 for _, b := range f.Blocks { 42 for _, v := range b.Values { 43 if v.Type.IsMemory() { 44 continue // memory values can never cse 45 } 46 if f.auxmap[v.Aux] == 0 { 47 f.auxmap[v.Aux] = int32(len(f.auxmap)) + 1 48 } 49 a = append(a, v) 50 } 51 } 52 partition := partitionValues(a, f.auxmap) 53 54 // map from value id back to eqclass id 55 valueEqClass := f.Cache.allocIDSlice(f.NumValues()) 56 defer f.Cache.freeIDSlice(valueEqClass) 57 for _, b := range f.Blocks { 58 for _, v := range b.Values { 59 // Use negative equivalence class #s for unique values. 60 valueEqClass[v.ID] = -v.ID 61 } 62 } 63 var pNum ID = 1 64 for _, e := range partition { 65 if f.pass.debug > 1 && len(e) > 500 { 66 fmt.Printf("CSE.large partition (%d): ", len(e)) 67 for j := 0; j < 3; j++ { 68 fmt.Printf("%s ", e[j].LongString()) 69 } 70 fmt.Println() 71 } 72 73 for _, v := range e { 74 valueEqClass[v.ID] = pNum 75 } 76 if f.pass.debug > 2 && len(e) > 1 { 77 fmt.Printf("CSE.partition #%d:", pNum) 78 for _, v := range e { 79 fmt.Printf(" %s", v.String()) 80 } 81 fmt.Printf("\n") 82 } 83 pNum++ 84 } 85 86 // Split equivalence classes at points where they have 87 // non-equivalent arguments. Repeat until we can't find any 88 // more splits. 89 var splitPoints []int 90 byArgClass := new(partitionByArgClass) // reusable partitionByArgClass to reduce allocations 91 for { 92 changed := false 93 94 // partition can grow in the loop. By not using a range loop here, 95 // we process new additions as they arrive, avoiding O(n^2) behavior. 96 for i := 0; i < len(partition); i++ { 97 e := partition[i] 98 99 if opcodeTable[e[0].Op].commutative { 100 // Order the first two args before comparison. 101 for _, v := range e { 102 if valueEqClass[v.Args[0].ID] > valueEqClass[v.Args[1].ID] { 103 v.Args[0], v.Args[1] = v.Args[1], v.Args[0] 104 } 105 } 106 } 107 108 // Sort by eq class of arguments. 109 byArgClass.a = e 110 byArgClass.eqClass = valueEqClass 111 sort.Sort(byArgClass) 112 113 // Find split points. 114 splitPoints = append(splitPoints[:0], 0) 115 for j := 1; j < len(e); j++ { 116 v, w := e[j-1], e[j] 117 // Note: commutative args already correctly ordered by byArgClass. 118 eqArgs := true 119 for k, a := range v.Args { 120 b := w.Args[k] 121 if valueEqClass[a.ID] != valueEqClass[b.ID] { 122 eqArgs = false 123 break 124 } 125 } 126 if !eqArgs { 127 splitPoints = append(splitPoints, j) 128 } 129 } 130 if len(splitPoints) == 1 { 131 continue // no splits, leave equivalence class alone. 132 } 133 134 // Move another equivalence class down in place of e. 135 partition[i] = partition[len(partition)-1] 136 partition = partition[:len(partition)-1] 137 i-- 138 139 // Add new equivalence classes for the parts of e we found. 140 splitPoints = append(splitPoints, len(e)) 141 for j := 0; j < len(splitPoints)-1; j++ { 142 f := e[splitPoints[j]:splitPoints[j+1]] 143 if len(f) == 1 { 144 // Don't add singletons. 145 valueEqClass[f[0].ID] = -f[0].ID 146 continue 147 } 148 for _, v := range f { 149 valueEqClass[v.ID] = pNum 150 } 151 pNum++ 152 partition = append(partition, f) 153 } 154 changed = true 155 } 156 157 if !changed { 158 break 159 } 160 } 161 162 sdom := f.Sdom() 163 164 // Compute substitutions we would like to do. We substitute v for w 165 // if v and w are in the same equivalence class and v dominates w. 166 rewrite := f.Cache.allocValueSlice(f.NumValues()) 167 defer f.Cache.freeValueSlice(rewrite) 168 byDom := new(partitionByDom) // reusable partitionByDom to reduce allocs 169 for _, e := range partition { 170 byDom.a = e 171 byDom.sdom = sdom 172 sort.Sort(byDom) 173 for i := 0; i < len(e)-1; i++ { 174 // e is sorted by domorder, so a maximal dominant element is first in the slice 175 v := e[i] 176 if v == nil { 177 continue 178 } 179 180 e[i] = nil 181 // Replace all elements of e which v dominates 182 for j := i + 1; j < len(e); j++ { 183 w := e[j] 184 if w == nil { 185 continue 186 } 187 if sdom.IsAncestorEq(v.Block, w.Block) { 188 rewrite[w.ID] = v 189 e[j] = nil 190 } else { 191 // e is sorted by domorder, so v.Block doesn't dominate any subsequent blocks in e 192 break 193 } 194 } 195 } 196 } 197 198 rewrites := int64(0) 199 200 // Apply substitutions 201 for _, b := range f.Blocks { 202 for _, v := range b.Values { 203 for i, w := range v.Args { 204 if x := rewrite[w.ID]; x != nil { 205 if w.Pos.IsStmt() == src.PosIsStmt { 206 // about to lose a statement marker, w 207 // w is an input to v; if they're in the same block 208 // and the same line, v is a good-enough new statement boundary. 209 if w.Block == v.Block && w.Pos.Line() == v.Pos.Line() { 210 v.Pos = v.Pos.WithIsStmt() 211 w.Pos = w.Pos.WithNotStmt() 212 } // TODO and if this fails? 213 } 214 v.SetArg(i, x) 215 rewrites++ 216 } 217 } 218 } 219 for i, v := range b.ControlValues() { 220 if x := rewrite[v.ID]; x != nil { 221 if v.Op == OpNilCheck { 222 // nilcheck pass will remove the nil checks and log 223 // them appropriately, so don't mess with them here. 224 continue 225 } 226 b.ReplaceControl(i, x) 227 } 228 } 229 } 230 231 if f.pass.stats > 0 { 232 f.LogStat("CSE REWRITES", rewrites) 233 } 234 } 235 236 // An eqclass approximates an equivalence class. During the 237 // algorithm it may represent the union of several of the 238 // final equivalence classes. 239 type eqclass []*Value 240 241 // partitionValues partitions the values into equivalence classes 242 // based on having all the following features match: 243 // - opcode 244 // - type 245 // - auxint 246 // - aux 247 // - nargs 248 // - block # if a phi op 249 // - first two arg's opcodes and auxint 250 // - NOT first two arg's aux; that can break CSE. 251 // 252 // partitionValues returns a list of equivalence classes, each 253 // being a sorted by ID list of *Values. The eqclass slices are 254 // backed by the same storage as the input slice. 255 // Equivalence classes of size 1 are ignored. 256 func partitionValues(a []*Value, auxIDs auxmap) []eqclass { 257 sort.Sort(sortvalues{a, auxIDs}) 258 259 var partition []eqclass 260 for len(a) > 0 { 261 v := a[0] 262 j := 1 263 for ; j < len(a); j++ { 264 w := a[j] 265 if cmpVal(v, w, auxIDs) != types.CMPeq { 266 break 267 } 268 } 269 if j > 1 { 270 partition = append(partition, a[:j]) 271 } 272 a = a[j:] 273 } 274 275 return partition 276 } 277 func lt2Cmp(isLt bool) types.Cmp { 278 if isLt { 279 return types.CMPlt 280 } 281 return types.CMPgt 282 } 283 284 type auxmap map[Aux]int32 285 286 func cmpVal(v, w *Value, auxIDs auxmap) types.Cmp { 287 // Try to order these comparison by cost (cheaper first) 288 if v.Op != w.Op { 289 return lt2Cmp(v.Op < w.Op) 290 } 291 if v.AuxInt != w.AuxInt { 292 return lt2Cmp(v.AuxInt < w.AuxInt) 293 } 294 if len(v.Args) != len(w.Args) { 295 return lt2Cmp(len(v.Args) < len(w.Args)) 296 } 297 if v.Op == OpPhi && v.Block != w.Block { 298 return lt2Cmp(v.Block.ID < w.Block.ID) 299 } 300 if v.Type.IsMemory() { 301 // We will never be able to CSE two values 302 // that generate memory. 303 return lt2Cmp(v.ID < w.ID) 304 } 305 // OpSelect is a pseudo-op. We need to be more aggressive 306 // regarding CSE to keep multiple OpSelect's of the same 307 // argument from existing. 308 if v.Op != OpSelect0 && v.Op != OpSelect1 && v.Op != OpSelectN { 309 if tc := v.Type.Compare(w.Type); tc != types.CMPeq { 310 return tc 311 } 312 } 313 314 if v.Aux != w.Aux { 315 if v.Aux == nil { 316 return types.CMPlt 317 } 318 if w.Aux == nil { 319 return types.CMPgt 320 } 321 return lt2Cmp(auxIDs[v.Aux] < auxIDs[w.Aux]) 322 } 323 324 return types.CMPeq 325 } 326 327 // Sort values to make the initial partition. 328 type sortvalues struct { 329 a []*Value // array of values 330 auxIDs auxmap // aux -> aux ID map 331 } 332 333 func (sv sortvalues) Len() int { return len(sv.a) } 334 func (sv sortvalues) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] } 335 func (sv sortvalues) Less(i, j int) bool { 336 v := sv.a[i] 337 w := sv.a[j] 338 if cmp := cmpVal(v, w, sv.auxIDs); cmp != types.CMPeq { 339 return cmp == types.CMPlt 340 } 341 342 // Sort by value ID last to keep the sort result deterministic. 343 return v.ID < w.ID 344 } 345 346 type partitionByDom struct { 347 a []*Value // array of values 348 sdom SparseTree 349 } 350 351 func (sv partitionByDom) Len() int { return len(sv.a) } 352 func (sv partitionByDom) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] } 353 func (sv partitionByDom) Less(i, j int) bool { 354 v := sv.a[i] 355 w := sv.a[j] 356 return sv.sdom.domorder(v.Block) < sv.sdom.domorder(w.Block) 357 } 358 359 type partitionByArgClass struct { 360 a []*Value // array of values 361 eqClass []ID // equivalence class IDs of values 362 } 363 364 func (sv partitionByArgClass) Len() int { return len(sv.a) } 365 func (sv partitionByArgClass) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] } 366 func (sv partitionByArgClass) Less(i, j int) bool { 367 v := sv.a[i] 368 w := sv.a[j] 369 for i, a := range v.Args { 370 b := w.Args[i] 371 if sv.eqClass[a.ID] < sv.eqClass[b.ID] { 372 return true 373 } 374 if sv.eqClass[a.ID] > sv.eqClass[b.ID] { 375 return false 376 } 377 } 378 return false 379 }