github.com/cockroachdb/tools@v0.0.0-20230222021103-a6d27438930d/go/pointer/analysis.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package pointer 6 7 // This file defines the main datatypes and Analyze function of the pointer analysis. 8 9 import ( 10 "fmt" 11 "go/token" 12 "go/types" 13 "io" 14 "os" 15 "reflect" 16 "runtime" 17 "runtime/debug" 18 "sort" 19 "strings" 20 21 "golang.org/x/tools/go/callgraph" 22 "golang.org/x/tools/go/ssa" 23 "golang.org/x/tools/go/types/typeutil" 24 ) 25 26 const ( 27 // optimization options; enable all when committing 28 optRenumber = true // enable renumbering optimization (makes logs hard to read) 29 optHVN = true // enable pointer equivalence via Hash-Value Numbering 30 31 // debugging options; disable all when committing 32 debugHVN = false // enable assertions in HVN 33 debugHVNVerbose = false // enable extra HVN logging 34 debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below) 35 debugTimers = false // show running time of each phase 36 ) 37 38 // object.flags bitmask values. 39 const ( 40 otTagged = 1 << iota // type-tagged object 41 otIndirect // type-tagged object with indirect payload 42 otFunction // function object 43 ) 44 45 // An object represents a contiguous block of memory to which some 46 // (generalized) pointer may point. 47 // 48 // (Note: most variables called 'obj' are not *objects but nodeids 49 // such that a.nodes[obj].obj != nil.) 50 type object struct { 51 // flags is a bitset of the node type (ot*) flags defined above. 52 flags uint32 53 54 // Number of following nodes belonging to the same "object" 55 // allocation. Zero for all other nodes. 56 size uint32 57 58 // data describes this object; it has one of these types: 59 // 60 // ssa.Value for an object allocated by an SSA operation. 61 // types.Type for an rtype instance object or *rtype-tagged object. 62 // string for an intrinsic object, e.g. the array behind os.Args. 63 // nil for an object allocated by an intrinsic. 64 // (cgn provides the identity of the intrinsic.) 65 data interface{} 66 67 // The call-graph node (=context) in which this object was allocated. 68 // May be nil for global objects: Global, Const, some Functions. 69 cgn *cgnode 70 } 71 72 // nodeid denotes a node. 73 // It is an index within analysis.nodes. 74 // We use small integers, not *node pointers, for many reasons: 75 // - they are smaller on 64-bit systems. 76 // - sets of them can be represented compactly in bitvectors or BDDs. 77 // - order matters; a field offset can be computed by simple addition. 78 type nodeid uint32 79 80 // A node is an equivalence class of memory locations. 81 // Nodes may be pointers, pointed-to locations, neither, or both. 82 // 83 // Nodes that are pointed-to locations ("labels") have an enclosing 84 // object (see analysis.enclosingObject). 85 type node struct { 86 // If non-nil, this node is the start of an object 87 // (addressable memory location). 88 // The following obj.size nodes implicitly belong to the object; 89 // they locate their object by scanning back. 90 obj *object 91 92 // The type of the field denoted by this node. Non-aggregate, 93 // unless this is an tagged.T node (i.e. the thing 94 // pointed to by an interface) in which case typ is that type. 95 typ types.Type 96 97 // subelement indicates which directly embedded subelement of 98 // an object of aggregate type (struct, tuple, array) this is. 99 subelement *fieldInfo // e.g. ".a.b[*].c" 100 101 // Solver state for the canonical node of this pointer- 102 // equivalence class. Each node is created with its own state 103 // but they become shared after HVN. 104 solve *solverState 105 } 106 107 // An analysis instance holds the state of a single pointer analysis problem. 108 type analysis struct { 109 config *Config // the client's control/observer interface 110 prog *ssa.Program // the program being analyzed 111 log io.Writer // log stream; nil to disable 112 panicNode nodeid // sink for panic, source for recover 113 nodes []*node // indexed by nodeid 114 flattenMemo map[types.Type][]*fieldInfo // memoization of flatten() 115 trackTypes map[types.Type]bool // memoization of shouldTrack() 116 constraints []constraint // set of constraints 117 cgnodes []*cgnode // all cgnodes 118 genq []*cgnode // queue of functions to generate constraints for 119 intrinsics map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns 120 globalval map[ssa.Value]nodeid // node for each global ssa.Value 121 globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton 122 localval map[ssa.Value]nodeid // node for each local ssa.Value 123 localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton 124 atFuncs map[*ssa.Function]bool // address-taken functions (for presolver) 125 mapValues []nodeid // values of makemap objects (indirect in HVN) 126 work nodeset // solver's worklist 127 result *Result // results of the analysis 128 track track // pointerlike types whose aliasing we track 129 deltaSpace []int // working space for iterating over PTS deltas 130 131 // Reflection & intrinsics: 132 hasher typeutil.Hasher // cache of type hashes 133 reflectValueObj types.Object // type symbol for reflect.Value (if present) 134 reflectValueCall *ssa.Function // (reflect.Value).Call 135 reflectRtypeObj types.Object // *types.TypeName for reflect.rtype (if present) 136 reflectRtypePtr *types.Pointer // *reflect.rtype 137 reflectType *types.Named // reflect.Type 138 rtypes typeutil.Map // nodeid of canonical *rtype-tagged object for type T 139 reflectZeros typeutil.Map // nodeid of canonical T-tagged object for zero value 140 runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer 141 } 142 143 // enclosingObj returns the first node of the addressable memory 144 // object that encloses node id. Panic ensues if that node does not 145 // belong to any object. 146 func (a *analysis) enclosingObj(id nodeid) nodeid { 147 // Find previous node with obj != nil. 148 for i := id; i >= 0; i-- { 149 n := a.nodes[i] 150 if obj := n.obj; obj != nil { 151 if i+nodeid(obj.size) <= id { 152 break // out of bounds 153 } 154 return i 155 } 156 } 157 panic("node has no enclosing object") 158 } 159 160 // labelFor returns the Label for node id. 161 // Panic ensues if that node is not addressable. 162 func (a *analysis) labelFor(id nodeid) *Label { 163 return &Label{ 164 obj: a.nodes[a.enclosingObj(id)].obj, 165 subelement: a.nodes[id].subelement, 166 } 167 } 168 169 func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) { 170 msg := fmt.Sprintf(format, args...) 171 if a.log != nil { 172 fmt.Fprintf(a.log, "%s: warning: %s\n", a.prog.Fset.Position(pos), msg) 173 } 174 a.result.Warnings = append(a.result.Warnings, Warning{pos, msg}) 175 } 176 177 // computeTrackBits sets a.track to the necessary 'track' bits for the pointer queries. 178 func (a *analysis) computeTrackBits() { 179 if len(a.config.extendedQueries) != 0 { 180 // TODO(dh): only track the types necessary for the query. 181 a.track = trackAll 182 return 183 } 184 var queryTypes []types.Type 185 for v := range a.config.Queries { 186 queryTypes = append(queryTypes, v.Type()) 187 } 188 for v := range a.config.IndirectQueries { 189 queryTypes = append(queryTypes, mustDeref(v.Type())) 190 } 191 for _, t := range queryTypes { 192 switch t.Underlying().(type) { 193 case *types.Chan: 194 a.track |= trackChan 195 case *types.Map: 196 a.track |= trackMap 197 case *types.Pointer: 198 a.track |= trackPtr 199 case *types.Slice: 200 a.track |= trackSlice 201 case *types.Interface: 202 a.track = trackAll 203 return 204 } 205 if rVObj := a.reflectValueObj; rVObj != nil && types.Identical(t, rVObj.Type()) { 206 a.track = trackAll 207 return 208 } 209 } 210 } 211 212 // Analyze runs the pointer analysis with the scope and options 213 // specified by config, and returns the (synthetic) root of the callgraph. 214 // 215 // Pointer analysis of a transitively closed well-typed program should 216 // always succeed. An error can occur only due to an internal bug. 217 func Analyze(config *Config) (result *Result, err error) { 218 if config.Mains == nil { 219 return nil, fmt.Errorf("no main/test packages to analyze (check $GOROOT/$GOPATH)") 220 } 221 defer func() { 222 if p := recover(); p != nil { 223 err = fmt.Errorf("internal error in pointer analysis: %v (please report this bug)", p) 224 fmt.Fprintln(os.Stderr, "Internal panic in pointer analysis:") 225 debug.PrintStack() 226 } 227 }() 228 229 a := &analysis{ 230 config: config, 231 log: config.Log, 232 prog: config.prog(), 233 globalval: make(map[ssa.Value]nodeid), 234 globalobj: make(map[ssa.Value]nodeid), 235 flattenMemo: make(map[types.Type][]*fieldInfo), 236 trackTypes: make(map[types.Type]bool), 237 atFuncs: make(map[*ssa.Function]bool), 238 hasher: typeutil.MakeHasher(), 239 intrinsics: make(map[*ssa.Function]intrinsic), 240 result: &Result{ 241 Queries: make(map[ssa.Value]Pointer), 242 IndirectQueries: make(map[ssa.Value]Pointer), 243 }, 244 deltaSpace: make([]int, 0, 100), 245 } 246 247 if false { 248 a.log = os.Stderr // for debugging crashes; extremely verbose 249 } 250 251 if a.log != nil { 252 fmt.Fprintln(a.log, "==== Starting analysis") 253 } 254 255 // Pointer analysis requires a complete program for soundness. 256 // Check to prevent accidental misconfiguration. 257 for _, pkg := range a.prog.AllPackages() { 258 // (This only checks that the package scope is complete, 259 // not that func bodies exist, but it's a good signal.) 260 if !pkg.Pkg.Complete() { 261 return nil, fmt.Errorf(`pointer analysis requires a complete program yet package %q was incomplete`, pkg.Pkg.Path()) 262 } 263 } 264 265 if reflect := a.prog.ImportedPackage("reflect"); reflect != nil { 266 rV := reflect.Pkg.Scope().Lookup("Value") 267 a.reflectValueObj = rV 268 a.reflectValueCall = a.prog.LookupMethod(rV.Type(), nil, "Call") 269 a.reflectType = reflect.Pkg.Scope().Lookup("Type").Type().(*types.Named) 270 a.reflectRtypeObj = reflect.Pkg.Scope().Lookup("rtype") 271 a.reflectRtypePtr = types.NewPointer(a.reflectRtypeObj.Type()) 272 273 // Override flattening of reflect.Value, treating it like a basic type. 274 tReflectValue := a.reflectValueObj.Type() 275 a.flattenMemo[tReflectValue] = []*fieldInfo{{typ: tReflectValue}} 276 277 // Override shouldTrack of reflect.Value and *reflect.rtype. 278 // Always track pointers of these types. 279 a.trackTypes[tReflectValue] = true 280 a.trackTypes[a.reflectRtypePtr] = true 281 282 a.rtypes.SetHasher(a.hasher) 283 a.reflectZeros.SetHasher(a.hasher) 284 } 285 if runtime := a.prog.ImportedPackage("runtime"); runtime != nil { 286 a.runtimeSetFinalizer = runtime.Func("SetFinalizer") 287 } 288 a.computeTrackBits() 289 290 a.generate() 291 a.showCounts() 292 293 if optRenumber { 294 a.renumber() 295 } 296 297 N := len(a.nodes) // excludes solver-created nodes 298 299 if optHVN { 300 if debugHVNCrossCheck { 301 // Cross-check: run the solver once without 302 // optimization, once with, and compare the 303 // solutions. 304 savedConstraints := a.constraints 305 306 a.solve() 307 a.dumpSolution("A.pts", N) 308 309 // Restore. 310 a.constraints = savedConstraints 311 for _, n := range a.nodes { 312 n.solve = new(solverState) 313 } 314 a.nodes = a.nodes[:N] 315 316 // rtypes is effectively part of the solver state. 317 a.rtypes = typeutil.Map{} 318 a.rtypes.SetHasher(a.hasher) 319 } 320 321 a.hvn() 322 } 323 324 if debugHVNCrossCheck { 325 runtime.GC() 326 runtime.GC() 327 } 328 329 a.solve() 330 331 // Compare solutions. 332 if optHVN && debugHVNCrossCheck { 333 a.dumpSolution("B.pts", N) 334 335 if !diff("A.pts", "B.pts") { 336 return nil, fmt.Errorf("internal error: optimization changed solution") 337 } 338 } 339 340 // Create callgraph.Nodes in deterministic order. 341 if cg := a.result.CallGraph; cg != nil { 342 for _, caller := range a.cgnodes { 343 cg.CreateNode(caller.fn) 344 } 345 } 346 347 // Add dynamic edges to call graph. 348 var space [100]int 349 for _, caller := range a.cgnodes { 350 for _, site := range caller.sites { 351 for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) { 352 a.callEdge(caller, site, nodeid(callee)) 353 } 354 } 355 } 356 357 return a.result, nil 358 } 359 360 // callEdge is called for each edge in the callgraph. 361 // calleeid is the callee's object node (has otFunction flag). 362 func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) { 363 obj := a.nodes[calleeid].obj 364 if obj.flags&otFunction == 0 { 365 panic(fmt.Sprintf("callEdge %s -> n%d: not a function object", site, calleeid)) 366 } 367 callee := obj.cgn 368 369 if cg := a.result.CallGraph; cg != nil { 370 // TODO(adonovan): opt: I would expect duplicate edges 371 // (to wrappers) to arise due to the elimination of 372 // context information, but I haven't observed any. 373 // Understand this better. 374 callgraph.AddEdge(cg.CreateNode(caller.fn), site.instr, cg.CreateNode(callee.fn)) 375 } 376 377 if a.log != nil { 378 fmt.Fprintf(a.log, "\tcall edge %s -> %s\n", site, callee) 379 } 380 381 // Warn about calls to functions that are handled unsoundly. 382 // TODO(adonovan): de-dup these messages. 383 fn := callee.fn 384 385 // Warn about calls to non-intrinsic external functions. 386 if fn.Blocks == nil && a.findIntrinsic(fn) == nil { 387 a.warnf(site.pos(), "unsound call to unknown intrinsic: %s", fn) 388 a.warnf(fn.Pos(), " (declared here)") 389 } 390 391 // Warn about calls to generic function bodies. 392 if fn.TypeParams().Len() > 0 && len(fn.TypeArgs()) == 0 { 393 a.warnf(site.pos(), "unsound call to generic function body: %s (build with ssa.InstantiateGenerics)", fn) 394 a.warnf(fn.Pos(), " (declared here)") 395 } 396 397 // Warn about calls to instantiation wrappers of generics functions. 398 if fn.Origin() != nil && strings.HasPrefix(fn.Synthetic, "instantiation wrapper ") { 399 a.warnf(site.pos(), "unsound call to instantiation wrapper of generic: %s (build with ssa.InstantiateGenerics)", fn) 400 a.warnf(fn.Pos(), " (declared here)") 401 } 402 } 403 404 // dumpSolution writes the PTS solution to the specified file. 405 // 406 // It only dumps the nodes that existed before solving. The order in 407 // which solver-created nodes are created depends on pre-solver 408 // optimization, so we can't include them in the cross-check. 409 func (a *analysis) dumpSolution(filename string, N int) { 410 f, err := os.Create(filename) 411 if err != nil { 412 panic(err) 413 } 414 for id, n := range a.nodes[:N] { 415 if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil { 416 panic(err) 417 } 418 var sep string 419 for _, l := range n.solve.pts.AppendTo(a.deltaSpace) { 420 if l >= N { 421 break 422 } 423 fmt.Fprintf(f, "%s%d", sep, l) 424 sep = " " 425 } 426 fmt.Fprintf(f, "} : %s\n", n.typ) 427 } 428 if err := f.Close(); err != nil { 429 panic(err) 430 } 431 } 432 433 // showCounts logs the size of the constraint system. A typical 434 // optimized distribution is 65% copy, 13% load, 11% addr, 5% 435 // offsetAddr, 4% store, 2% others. 436 func (a *analysis) showCounts() { 437 if a.log != nil { 438 counts := make(map[reflect.Type]int) 439 for _, c := range a.constraints { 440 counts[reflect.TypeOf(c)]++ 441 } 442 fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints)) 443 var lines []string 444 for t, n := range counts { 445 line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t) 446 lines = append(lines, line) 447 } 448 sort.Sort(sort.Reverse(sort.StringSlice(lines))) 449 for _, line := range lines { 450 fmt.Fprintf(a.log, "\t%s\n", line) 451 } 452 453 fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes)) 454 455 // Show number of pointer equivalence classes. 456 m := make(map[*solverState]bool) 457 for _, n := range a.nodes { 458 m[n.solve] = true 459 } 460 fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m)) 461 } 462 }