github.com/cockroachdb/tools@v0.0.0-20230222021103-a6d27438930d/go/pointer/analysis.go

github.com/cockroachdb/tools@v0.0.0-20230222021103-a6d27438930d/go/pointer/analysis.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pointer
     6  
     7  // This file defines the main datatypes and Analyze function of the pointer analysis.
     8  
     9  import (
    10  	"fmt"
    11  	"go/token"
    12  	"go/types"
    13  	"io"
    14  	"os"
    15  	"reflect"
    16  	"runtime"
    17  	"runtime/debug"
    18  	"sort"
    19  	"strings"
    20  
    21  	"golang.org/x/tools/go/callgraph"
    22  	"golang.org/x/tools/go/ssa"
    23  	"golang.org/x/tools/go/types/typeutil"
    24  )
    25  
    26  const (
    27  	// optimization options; enable all when committing
    28  	optRenumber = true // enable renumbering optimization (makes logs hard to read)
    29  	optHVN      = true // enable pointer equivalence via Hash-Value Numbering
    30  
    31  	// debugging options; disable all when committing
    32  	debugHVN           = false // enable assertions in HVN
    33  	debugHVNVerbose    = false // enable extra HVN logging
    34  	debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below)
    35  	debugTimers        = false // show running time of each phase
    36  )
    37  
    38  // object.flags bitmask values.
    39  const (
    40  	otTagged   = 1 << iota // type-tagged object
    41  	otIndirect             // type-tagged object with indirect payload
    42  	otFunction             // function object
    43  )
    44  
    45  // An object represents a contiguous block of memory to which some
    46  // (generalized) pointer may point.
    47  //
    48  // (Note: most variables called 'obj' are not *objects but nodeids
    49  // such that a.nodes[obj].obj != nil.)
    50  type object struct {
    51  	// flags is a bitset of the node type (ot*) flags defined above.
    52  	flags uint32
    53  
    54  	// Number of following nodes belonging to the same "object"
    55  	// allocation.  Zero for all other nodes.
    56  	size uint32
    57  
    58  	// data describes this object; it has one of these types:
    59  	//
    60  	// ssa.Value	for an object allocated by an SSA operation.
    61  	// types.Type	for an rtype instance object or *rtype-tagged object.
    62  	// string	for an intrinsic object, e.g. the array behind os.Args.
    63  	// nil		for an object allocated by an intrinsic.
    64  	//		(cgn provides the identity of the intrinsic.)
    65  	data interface{}
    66  
    67  	// The call-graph node (=context) in which this object was allocated.
    68  	// May be nil for global objects: Global, Const, some Functions.
    69  	cgn *cgnode
    70  }
    71  
    72  // nodeid denotes a node.
    73  // It is an index within analysis.nodes.
    74  // We use small integers, not *node pointers, for many reasons:
    75  // - they are smaller on 64-bit systems.
    76  // - sets of them can be represented compactly in bitvectors or BDDs.
    77  // - order matters; a field offset can be computed by simple addition.
    78  type nodeid uint32
    79  
    80  // A node is an equivalence class of memory locations.
    81  // Nodes may be pointers, pointed-to locations, neither, or both.
    82  //
    83  // Nodes that are pointed-to locations ("labels") have an enclosing
    84  // object (see analysis.enclosingObject).
    85  type node struct {
    86  	// If non-nil, this node is the start of an object
    87  	// (addressable memory location).
    88  	// The following obj.size nodes implicitly belong to the object;
    89  	// they locate their object by scanning back.
    90  	obj *object
    91  
    92  	// The type of the field denoted by this node.  Non-aggregate,
    93  	// unless this is an tagged.T node (i.e. the thing
    94  	// pointed to by an interface) in which case typ is that type.
    95  	typ types.Type
    96  
    97  	// subelement indicates which directly embedded subelement of
    98  	// an object of aggregate type (struct, tuple, array) this is.
    99  	subelement *fieldInfo // e.g. ".a.b[*].c"
   100  
   101  	// Solver state for the canonical node of this pointer-
   102  	// equivalence class.  Each node is created with its own state
   103  	// but they become shared after HVN.
   104  	solve *solverState
   105  }
   106  
   107  // An analysis instance holds the state of a single pointer analysis problem.
   108  type analysis struct {
   109  	config      *Config                     // the client's control/observer interface
   110  	prog        *ssa.Program                // the program being analyzed
   111  	log         io.Writer                   // log stream; nil to disable
   112  	panicNode   nodeid                      // sink for panic, source for recover
   113  	nodes       []*node                     // indexed by nodeid
   114  	flattenMemo map[types.Type][]*fieldInfo // memoization of flatten()
   115  	trackTypes  map[types.Type]bool         // memoization of shouldTrack()
   116  	constraints []constraint                // set of constraints
   117  	cgnodes     []*cgnode                   // all cgnodes
   118  	genq        []*cgnode                   // queue of functions to generate constraints for
   119  	intrinsics  map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns
   120  	globalval   map[ssa.Value]nodeid        // node for each global ssa.Value
   121  	globalobj   map[ssa.Value]nodeid        // maps v to sole member of pts(v), if singleton
   122  	localval    map[ssa.Value]nodeid        // node for each local ssa.Value
   123  	localobj    map[ssa.Value]nodeid        // maps v to sole member of pts(v), if singleton
   124  	atFuncs     map[*ssa.Function]bool      // address-taken functions (for presolver)
   125  	mapValues   []nodeid                    // values of makemap objects (indirect in HVN)
   126  	work        nodeset                     // solver's worklist
   127  	result      *Result                     // results of the analysis
   128  	track       track                       // pointerlike types whose aliasing we track
   129  	deltaSpace  []int                       // working space for iterating over PTS deltas
   130  
   131  	// Reflection & intrinsics:
   132  	hasher              typeutil.Hasher // cache of type hashes
   133  	reflectValueObj     types.Object    // type symbol for reflect.Value (if present)
   134  	reflectValueCall    *ssa.Function   // (reflect.Value).Call
   135  	reflectRtypeObj     types.Object    // *types.TypeName for reflect.rtype (if present)
   136  	reflectRtypePtr     *types.Pointer  // *reflect.rtype
   137  	reflectType         *types.Named    // reflect.Type
   138  	rtypes              typeutil.Map    // nodeid of canonical *rtype-tagged object for type T
   139  	reflectZeros        typeutil.Map    // nodeid of canonical T-tagged object for zero value
   140  	runtimeSetFinalizer *ssa.Function   // runtime.SetFinalizer
   141  }
   142  
   143  // enclosingObj returns the first node of the addressable memory
   144  // object that encloses node id.  Panic ensues if that node does not
   145  // belong to any object.
   146  func (a *analysis) enclosingObj(id nodeid) nodeid {
   147  	// Find previous node with obj != nil.
   148  	for i := id; i >= 0; i-- {
   149  		n := a.nodes[i]
   150  		if obj := n.obj; obj != nil {
   151  			if i+nodeid(obj.size) <= id {
   152  				break // out of bounds
   153  			}
   154  			return i
   155  		}
   156  	}
   157  	panic("node has no enclosing object")
   158  }
   159  
   160  // labelFor returns the Label for node id.
   161  // Panic ensues if that node is not addressable.
   162  func (a *analysis) labelFor(id nodeid) *Label {
   163  	return &Label{
   164  		obj:        a.nodes[a.enclosingObj(id)].obj,
   165  		subelement: a.nodes[id].subelement,
   166  	}
   167  }
   168  
   169  func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) {
   170  	msg := fmt.Sprintf(format, args...)
   171  	if a.log != nil {
   172  		fmt.Fprintf(a.log, "%s: warning: %s\n", a.prog.Fset.Position(pos), msg)
   173  	}
   174  	a.result.Warnings = append(a.result.Warnings, Warning{pos, msg})
   175  }
   176  
   177  // computeTrackBits sets a.track to the necessary 'track' bits for the pointer queries.
   178  func (a *analysis) computeTrackBits() {
   179  	if len(a.config.extendedQueries) != 0 {
   180  		// TODO(dh): only track the types necessary for the query.
   181  		a.track = trackAll
   182  		return
   183  	}
   184  	var queryTypes []types.Type
   185  	for v := range a.config.Queries {
   186  		queryTypes = append(queryTypes, v.Type())
   187  	}
   188  	for v := range a.config.IndirectQueries {
   189  		queryTypes = append(queryTypes, mustDeref(v.Type()))
   190  	}
   191  	for _, t := range queryTypes {
   192  		switch t.Underlying().(type) {
   193  		case *types.Chan:
   194  			a.track |= trackChan
   195  		case *types.Map:
   196  			a.track |= trackMap
   197  		case *types.Pointer:
   198  			a.track |= trackPtr
   199  		case *types.Slice:
   200  			a.track |= trackSlice
   201  		case *types.Interface:
   202  			a.track = trackAll
   203  			return
   204  		}
   205  		if rVObj := a.reflectValueObj; rVObj != nil && types.Identical(t, rVObj.Type()) {
   206  			a.track = trackAll
   207  			return
   208  		}
   209  	}
   210  }
   211  
   212  // Analyze runs the pointer analysis with the scope and options
   213  // specified by config, and returns the (synthetic) root of the callgraph.
   214  //
   215  // Pointer analysis of a transitively closed well-typed program should
   216  // always succeed.  An error can occur only due to an internal bug.
   217  func Analyze(config *Config) (result *Result, err error) {
   218  	if config.Mains == nil {
   219  		return nil, fmt.Errorf("no main/test packages to analyze (check $GOROOT/$GOPATH)")
   220  	}
   221  	defer func() {
   222  		if p := recover(); p != nil {
   223  			err = fmt.Errorf("internal error in pointer analysis: %v (please report this bug)", p)
   224  			fmt.Fprintln(os.Stderr, "Internal panic in pointer analysis:")
   225  			debug.PrintStack()
   226  		}
   227  	}()
   228  
   229  	a := &analysis{
   230  		config:      config,
   231  		log:         config.Log,
   232  		prog:        config.prog(),
   233  		globalval:   make(map[ssa.Value]nodeid),
   234  		globalobj:   make(map[ssa.Value]nodeid),
   235  		flattenMemo: make(map[types.Type][]*fieldInfo),
   236  		trackTypes:  make(map[types.Type]bool),
   237  		atFuncs:     make(map[*ssa.Function]bool),
   238  		hasher:      typeutil.MakeHasher(),
   239  		intrinsics:  make(map[*ssa.Function]intrinsic),
   240  		result: &Result{
   241  			Queries:         make(map[ssa.Value]Pointer),
   242  			IndirectQueries: make(map[ssa.Value]Pointer),
   243  		},
   244  		deltaSpace: make([]int, 0, 100),
   245  	}
   246  
   247  	if false {
   248  		a.log = os.Stderr // for debugging crashes; extremely verbose
   249  	}
   250  
   251  	if a.log != nil {
   252  		fmt.Fprintln(a.log, "==== Starting analysis")
   253  	}
   254  
   255  	// Pointer analysis requires a complete program for soundness.
   256  	// Check to prevent accidental misconfiguration.
   257  	for _, pkg := range a.prog.AllPackages() {
   258  		// (This only checks that the package scope is complete,
   259  		// not that func bodies exist, but it's a good signal.)
   260  		if !pkg.Pkg.Complete() {
   261  			return nil, fmt.Errorf(`pointer analysis requires a complete program yet package %q was incomplete`, pkg.Pkg.Path())
   262  		}
   263  	}
   264  
   265  	if reflect := a.prog.ImportedPackage("reflect"); reflect != nil {
   266  		rV := reflect.Pkg.Scope().Lookup("Value")
   267  		a.reflectValueObj = rV
   268  		a.reflectValueCall = a.prog.LookupMethod(rV.Type(), nil, "Call")
   269  		a.reflectType = reflect.Pkg.Scope().Lookup("Type").Type().(*types.Named)
   270  		a.reflectRtypeObj = reflect.Pkg.Scope().Lookup("rtype")
   271  		a.reflectRtypePtr = types.NewPointer(a.reflectRtypeObj.Type())
   272  
   273  		// Override flattening of reflect.Value, treating it like a basic type.
   274  		tReflectValue := a.reflectValueObj.Type()
   275  		a.flattenMemo[tReflectValue] = []*fieldInfo{{typ: tReflectValue}}
   276  
   277  		// Override shouldTrack of reflect.Value and *reflect.rtype.
   278  		// Always track pointers of these types.
   279  		a.trackTypes[tReflectValue] = true
   280  		a.trackTypes[a.reflectRtypePtr] = true
   281  
   282  		a.rtypes.SetHasher(a.hasher)
   283  		a.reflectZeros.SetHasher(a.hasher)
   284  	}
   285  	if runtime := a.prog.ImportedPackage("runtime"); runtime != nil {
   286  		a.runtimeSetFinalizer = runtime.Func("SetFinalizer")
   287  	}
   288  	a.computeTrackBits()
   289  
   290  	a.generate()
   291  	a.showCounts()
   292  
   293  	if optRenumber {
   294  		a.renumber()
   295  	}
   296  
   297  	N := len(a.nodes) // excludes solver-created nodes
   298  
   299  	if optHVN {
   300  		if debugHVNCrossCheck {
   301  			// Cross-check: run the solver once without
   302  			// optimization, once with, and compare the
   303  			// solutions.
   304  			savedConstraints := a.constraints
   305  
   306  			a.solve()
   307  			a.dumpSolution("A.pts", N)
   308  
   309  			// Restore.
   310  			a.constraints = savedConstraints
   311  			for _, n := range a.nodes {
   312  				n.solve = new(solverState)
   313  			}
   314  			a.nodes = a.nodes[:N]
   315  
   316  			// rtypes is effectively part of the solver state.
   317  			a.rtypes = typeutil.Map{}
   318  			a.rtypes.SetHasher(a.hasher)
   319  		}
   320  
   321  		a.hvn()
   322  	}
   323  
   324  	if debugHVNCrossCheck {
   325  		runtime.GC()
   326  		runtime.GC()
   327  	}
   328  
   329  	a.solve()
   330  
   331  	// Compare solutions.
   332  	if optHVN && debugHVNCrossCheck {
   333  		a.dumpSolution("B.pts", N)
   334  
   335  		if !diff("A.pts", "B.pts") {
   336  			return nil, fmt.Errorf("internal error: optimization changed solution")
   337  		}
   338  	}
   339  
   340  	// Create callgraph.Nodes in deterministic order.
   341  	if cg := a.result.CallGraph; cg != nil {
   342  		for _, caller := range a.cgnodes {
   343  			cg.CreateNode(caller.fn)
   344  		}
   345  	}
   346  
   347  	// Add dynamic edges to call graph.
   348  	var space [100]int
   349  	for _, caller := range a.cgnodes {
   350  		for _, site := range caller.sites {
   351  			for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) {
   352  				a.callEdge(caller, site, nodeid(callee))
   353  			}
   354  		}
   355  	}
   356  
   357  	return a.result, nil
   358  }
   359  
   360  // callEdge is called for each edge in the callgraph.
   361  // calleeid is the callee's object node (has otFunction flag).
   362  func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) {
   363  	obj := a.nodes[calleeid].obj
   364  	if obj.flags&otFunction == 0 {
   365  		panic(fmt.Sprintf("callEdge %s -> n%d: not a function object", site, calleeid))
   366  	}
   367  	callee := obj.cgn
   368  
   369  	if cg := a.result.CallGraph; cg != nil {
   370  		// TODO(adonovan): opt: I would expect duplicate edges
   371  		// (to wrappers) to arise due to the elimination of
   372  		// context information, but I haven't observed any.
   373  		// Understand this better.
   374  		callgraph.AddEdge(cg.CreateNode(caller.fn), site.instr, cg.CreateNode(callee.fn))
   375  	}
   376  
   377  	if a.log != nil {
   378  		fmt.Fprintf(a.log, "\tcall edge %s -> %s\n", site, callee)
   379  	}
   380  
   381  	// Warn about calls to functions that are handled unsoundly.
   382  	// TODO(adonovan): de-dup these messages.
   383  	fn := callee.fn
   384  
   385  	// Warn about calls to non-intrinsic external functions.
   386  	if fn.Blocks == nil && a.findIntrinsic(fn) == nil {
   387  		a.warnf(site.pos(), "unsound call to unknown intrinsic: %s", fn)
   388  		a.warnf(fn.Pos(), " (declared here)")
   389  	}
   390  
   391  	// Warn about calls to generic function bodies.
   392  	if fn.TypeParams().Len() > 0 && len(fn.TypeArgs()) == 0 {
   393  		a.warnf(site.pos(), "unsound call to generic function body: %s (build with ssa.InstantiateGenerics)", fn)
   394  		a.warnf(fn.Pos(), " (declared here)")
   395  	}
   396  
   397  	// Warn about calls to instantiation wrappers of generics functions.
   398  	if fn.Origin() != nil && strings.HasPrefix(fn.Synthetic, "instantiation wrapper ") {
   399  		a.warnf(site.pos(), "unsound call to instantiation wrapper of generic: %s (build with ssa.InstantiateGenerics)", fn)
   400  		a.warnf(fn.Pos(), " (declared here)")
   401  	}
   402  }
   403  
   404  // dumpSolution writes the PTS solution to the specified file.
   405  //
   406  // It only dumps the nodes that existed before solving.  The order in
   407  // which solver-created nodes are created depends on pre-solver
   408  // optimization, so we can't include them in the cross-check.
   409  func (a *analysis) dumpSolution(filename string, N int) {
   410  	f, err := os.Create(filename)
   411  	if err != nil {
   412  		panic(err)
   413  	}
   414  	for id, n := range a.nodes[:N] {
   415  		if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil {
   416  			panic(err)
   417  		}
   418  		var sep string
   419  		for _, l := range n.solve.pts.AppendTo(a.deltaSpace) {
   420  			if l >= N {
   421  				break
   422  			}
   423  			fmt.Fprintf(f, "%s%d", sep, l)
   424  			sep = " "
   425  		}
   426  		fmt.Fprintf(f, "} : %s\n", n.typ)
   427  	}
   428  	if err := f.Close(); err != nil {
   429  		panic(err)
   430  	}
   431  }
   432  
   433  // showCounts logs the size of the constraint system.  A typical
   434  // optimized distribution is 65% copy, 13% load, 11% addr, 5%
   435  // offsetAddr, 4% store, 2% others.
   436  func (a *analysis) showCounts() {
   437  	if a.log != nil {
   438  		counts := make(map[reflect.Type]int)
   439  		for _, c := range a.constraints {
   440  			counts[reflect.TypeOf(c)]++
   441  		}
   442  		fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
   443  		var lines []string
   444  		for t, n := range counts {
   445  			line := fmt.Sprintf("%7d  (%2d%%)\t%s", n, 100*n/len(a.constraints), t)
   446  			lines = append(lines, line)
   447  		}
   448  		sort.Sort(sort.Reverse(sort.StringSlice(lines)))
   449  		for _, line := range lines {
   450  			fmt.Fprintf(a.log, "\t%s\n", line)
   451  		}
   452  
   453  		fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
   454  
   455  		// Show number of pointer equivalence classes.
   456  		m := make(map[*solverState]bool)
   457  		for _, n := range a.nodes {
   458  			m[n.solve] = true
   459  		}
   460  		fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m))
   461  	}
   462  }