github.com/v2fly/tools@v0.100.0/go/pointer/analysis.go

github.com/v2fly/tools@v0.100.0/go/pointer/analysis.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pointer
     6  
     7  // This file defines the main datatypes and Analyze function of the pointer analysis.
     8  
     9  import (
    10  	"fmt"
    11  	"go/token"
    12  	"go/types"
    13  	"io"
    14  	"os"
    15  	"reflect"
    16  	"runtime"
    17  	"runtime/debug"
    18  	"sort"
    19  
    20  	"github.com/v2fly/tools/go/callgraph"
    21  	"github.com/v2fly/tools/go/ssa"
    22  	"github.com/v2fly/tools/go/types/typeutil"
    23  )
    24  
    25  const (
    26  	// optimization options; enable all when committing
    27  	optRenumber = true // enable renumbering optimization (makes logs hard to read)
    28  	optHVN      = true // enable pointer equivalence via Hash-Value Numbering
    29  
    30  	// debugging options; disable all when committing
    31  	debugHVN           = false // enable assertions in HVN
    32  	debugHVNVerbose    = false // enable extra HVN logging
    33  	debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below)
    34  	debugTimers        = false // show running time of each phase
    35  )
    36  
    37  // object.flags bitmask values.
    38  const (
    39  	otTagged   = 1 << iota // type-tagged object
    40  	otIndirect             // type-tagged object with indirect payload
    41  	otFunction             // function object
    42  )
    43  
    44  // An object represents a contiguous block of memory to which some
    45  // (generalized) pointer may point.
    46  //
    47  // (Note: most variables called 'obj' are not *objects but nodeids
    48  // such that a.nodes[obj].obj != nil.)
    49  //
    50  type object struct {
    51  	// flags is a bitset of the node type (ot*) flags defined above.
    52  	flags uint32
    53  
    54  	// Number of following nodes belonging to the same "object"
    55  	// allocation.  Zero for all other nodes.
    56  	size uint32
    57  
    58  	// data describes this object; it has one of these types:
    59  	//
    60  	// ssa.Value	for an object allocated by an SSA operation.
    61  	// types.Type	for an rtype instance object or *rtype-tagged object.
    62  	// string	for an instrinsic object, e.g. the array behind os.Args.
    63  	// nil		for an object allocated by an instrinsic.
    64  	//		(cgn provides the identity of the intrinsic.)
    65  	data interface{}
    66  
    67  	// The call-graph node (=context) in which this object was allocated.
    68  	// May be nil for global objects: Global, Const, some Functions.
    69  	cgn *cgnode
    70  }
    71  
    72  // nodeid denotes a node.
    73  // It is an index within analysis.nodes.
    74  // We use small integers, not *node pointers, for many reasons:
    75  // - they are smaller on 64-bit systems.
    76  // - sets of them can be represented compactly in bitvectors or BDDs.
    77  // - order matters; a field offset can be computed by simple addition.
    78  type nodeid uint32
    79  
    80  // A node is an equivalence class of memory locations.
    81  // Nodes may be pointers, pointed-to locations, neither, or both.
    82  //
    83  // Nodes that are pointed-to locations ("labels") have an enclosing
    84  // object (see analysis.enclosingObject).
    85  //
    86  type node struct {
    87  	// If non-nil, this node is the start of an object
    88  	// (addressable memory location).
    89  	// The following obj.size nodes implicitly belong to the object;
    90  	// they locate their object by scanning back.
    91  	obj *object
    92  
    93  	// The type of the field denoted by this node.  Non-aggregate,
    94  	// unless this is an tagged.T node (i.e. the thing
    95  	// pointed to by an interface) in which case typ is that type.
    96  	typ types.Type
    97  
    98  	// subelement indicates which directly embedded subelement of
    99  	// an object of aggregate type (struct, tuple, array) this is.
   100  	subelement *fieldInfo // e.g. ".a.b[*].c"
   101  
   102  	// Solver state for the canonical node of this pointer-
   103  	// equivalence class.  Each node is created with its own state
   104  	// but they become shared after HVN.
   105  	solve *solverState
   106  }
   107  
   108  // An analysis instance holds the state of a single pointer analysis problem.
   109  type analysis struct {
   110  	config      *Config                     // the client's control/observer interface
   111  	prog        *ssa.Program                // the program being analyzed
   112  	log         io.Writer                   // log stream; nil to disable
   113  	panicNode   nodeid                      // sink for panic, source for recover
   114  	nodes       []*node                     // indexed by nodeid
   115  	flattenMemo map[types.Type][]*fieldInfo // memoization of flatten()
   116  	trackTypes  map[types.Type]bool         // memoization of shouldTrack()
   117  	constraints []constraint                // set of constraints
   118  	cgnodes     []*cgnode                   // all cgnodes
   119  	genq        []*cgnode                   // queue of functions to generate constraints for
   120  	intrinsics  map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns
   121  	globalval   map[ssa.Value]nodeid        // node for each global ssa.Value
   122  	globalobj   map[ssa.Value]nodeid        // maps v to sole member of pts(v), if singleton
   123  	localval    map[ssa.Value]nodeid        // node for each local ssa.Value
   124  	localobj    map[ssa.Value]nodeid        // maps v to sole member of pts(v), if singleton
   125  	atFuncs     map[*ssa.Function]bool      // address-taken functions (for presolver)
   126  	mapValues   []nodeid                    // values of makemap objects (indirect in HVN)
   127  	work        nodeset                     // solver's worklist
   128  	result      *Result                     // results of the analysis
   129  	track       track                       // pointerlike types whose aliasing we track
   130  	deltaSpace  []int                       // working space for iterating over PTS deltas
   131  
   132  	// Reflection & intrinsics:
   133  	hasher              typeutil.Hasher // cache of type hashes
   134  	reflectValueObj     types.Object    // type symbol for reflect.Value (if present)
   135  	reflectValueCall    *ssa.Function   // (reflect.Value).Call
   136  	reflectRtypeObj     types.Object    // *types.TypeName for reflect.rtype (if present)
   137  	reflectRtypePtr     *types.Pointer  // *reflect.rtype
   138  	reflectType         *types.Named    // reflect.Type
   139  	rtypes              typeutil.Map    // nodeid of canonical *rtype-tagged object for type T
   140  	reflectZeros        typeutil.Map    // nodeid of canonical T-tagged object for zero value
   141  	runtimeSetFinalizer *ssa.Function   // runtime.SetFinalizer
   142  }
   143  
   144  // enclosingObj returns the first node of the addressable memory
   145  // object that encloses node id.  Panic ensues if that node does not
   146  // belong to any object.
   147  func (a *analysis) enclosingObj(id nodeid) nodeid {
   148  	// Find previous node with obj != nil.
   149  	for i := id; i >= 0; i-- {
   150  		n := a.nodes[i]
   151  		if obj := n.obj; obj != nil {
   152  			if i+nodeid(obj.size) <= id {
   153  				break // out of bounds
   154  			}
   155  			return i
   156  		}
   157  	}
   158  	panic("node has no enclosing object")
   159  }
   160  
   161  // labelFor returns the Label for node id.
   162  // Panic ensues if that node is not addressable.
   163  func (a *analysis) labelFor(id nodeid) *Label {
   164  	return &Label{
   165  		obj:        a.nodes[a.enclosingObj(id)].obj,
   166  		subelement: a.nodes[id].subelement,
   167  	}
   168  }
   169  
   170  func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) {
   171  	msg := fmt.Sprintf(format, args...)
   172  	if a.log != nil {
   173  		fmt.Fprintf(a.log, "%s: warning: %s\n", a.prog.Fset.Position(pos), msg)
   174  	}
   175  	a.result.Warnings = append(a.result.Warnings, Warning{pos, msg})
   176  }
   177  
   178  // computeTrackBits sets a.track to the necessary 'track' bits for the pointer queries.
   179  func (a *analysis) computeTrackBits() {
   180  	if len(a.config.extendedQueries) != 0 {
   181  		// TODO(dh): only track the types necessary for the query.
   182  		a.track = trackAll
   183  		return
   184  	}
   185  	var queryTypes []types.Type
   186  	for v := range a.config.Queries {
   187  		queryTypes = append(queryTypes, v.Type())
   188  	}
   189  	for v := range a.config.IndirectQueries {
   190  		queryTypes = append(queryTypes, mustDeref(v.Type()))
   191  	}
   192  	for _, t := range queryTypes {
   193  		switch t.Underlying().(type) {
   194  		case *types.Chan:
   195  			a.track |= trackChan
   196  		case *types.Map:
   197  			a.track |= trackMap
   198  		case *types.Pointer:
   199  			a.track |= trackPtr
   200  		case *types.Slice:
   201  			a.track |= trackSlice
   202  		case *types.Interface:
   203  			a.track = trackAll
   204  			return
   205  		}
   206  		if rVObj := a.reflectValueObj; rVObj != nil && types.Identical(t, rVObj.Type()) {
   207  			a.track = trackAll
   208  			return
   209  		}
   210  	}
   211  }
   212  
   213  // Analyze runs the pointer analysis with the scope and options
   214  // specified by config, and returns the (synthetic) root of the callgraph.
   215  //
   216  // Pointer analysis of a transitively closed well-typed program should
   217  // always succeed.  An error can occur only due to an internal bug.
   218  //
   219  func Analyze(config *Config) (result *Result, err error) {
   220  	if config.Mains == nil {
   221  		return nil, fmt.Errorf("no main/test packages to analyze (check $GOROOT/$GOPATH)")
   222  	}
   223  	defer func() {
   224  		if p := recover(); p != nil {
   225  			err = fmt.Errorf("internal error in pointer analysis: %v (please report this bug)", p)
   226  			fmt.Fprintln(os.Stderr, "Internal panic in pointer analysis:")
   227  			debug.PrintStack()
   228  		}
   229  	}()
   230  
   231  	a := &analysis{
   232  		config:      config,
   233  		log:         config.Log,
   234  		prog:        config.prog(),
   235  		globalval:   make(map[ssa.Value]nodeid),
   236  		globalobj:   make(map[ssa.Value]nodeid),
   237  		flattenMemo: make(map[types.Type][]*fieldInfo),
   238  		trackTypes:  make(map[types.Type]bool),
   239  		atFuncs:     make(map[*ssa.Function]bool),
   240  		hasher:      typeutil.MakeHasher(),
   241  		intrinsics:  make(map[*ssa.Function]intrinsic),
   242  		result: &Result{
   243  			Queries:         make(map[ssa.Value]Pointer),
   244  			IndirectQueries: make(map[ssa.Value]Pointer),
   245  		},
   246  		deltaSpace: make([]int, 0, 100),
   247  	}
   248  
   249  	if false {
   250  		a.log = os.Stderr // for debugging crashes; extremely verbose
   251  	}
   252  
   253  	if a.log != nil {
   254  		fmt.Fprintln(a.log, "==== Starting analysis")
   255  	}
   256  
   257  	// Pointer analysis requires a complete program for soundness.
   258  	// Check to prevent accidental misconfiguration.
   259  	for _, pkg := range a.prog.AllPackages() {
   260  		// (This only checks that the package scope is complete,
   261  		// not that func bodies exist, but it's a good signal.)
   262  		if !pkg.Pkg.Complete() {
   263  			return nil, fmt.Errorf(`pointer analysis requires a complete program yet package %q was incomplete`, pkg.Pkg.Path())
   264  		}
   265  	}
   266  
   267  	if reflect := a.prog.ImportedPackage("reflect"); reflect != nil {
   268  		rV := reflect.Pkg.Scope().Lookup("Value")
   269  		a.reflectValueObj = rV
   270  		a.reflectValueCall = a.prog.LookupMethod(rV.Type(), nil, "Call")
   271  		a.reflectType = reflect.Pkg.Scope().Lookup("Type").Type().(*types.Named)
   272  		a.reflectRtypeObj = reflect.Pkg.Scope().Lookup("rtype")
   273  		a.reflectRtypePtr = types.NewPointer(a.reflectRtypeObj.Type())
   274  
   275  		// Override flattening of reflect.Value, treating it like a basic type.
   276  		tReflectValue := a.reflectValueObj.Type()
   277  		a.flattenMemo[tReflectValue] = []*fieldInfo{{typ: tReflectValue}}
   278  
   279  		// Override shouldTrack of reflect.Value and *reflect.rtype.
   280  		// Always track pointers of these types.
   281  		a.trackTypes[tReflectValue] = true
   282  		a.trackTypes[a.reflectRtypePtr] = true
   283  
   284  		a.rtypes.SetHasher(a.hasher)
   285  		a.reflectZeros.SetHasher(a.hasher)
   286  	}
   287  	if runtime := a.prog.ImportedPackage("runtime"); runtime != nil {
   288  		a.runtimeSetFinalizer = runtime.Func("SetFinalizer")
   289  	}
   290  	a.computeTrackBits()
   291  
   292  	a.generate()
   293  	a.showCounts()
   294  
   295  	if optRenumber {
   296  		a.renumber()
   297  	}
   298  
   299  	N := len(a.nodes) // excludes solver-created nodes
   300  
   301  	if optHVN {
   302  		if debugHVNCrossCheck {
   303  			// Cross-check: run the solver once without
   304  			// optimization, once with, and compare the
   305  			// solutions.
   306  			savedConstraints := a.constraints
   307  
   308  			a.solve()
   309  			a.dumpSolution("A.pts", N)
   310  
   311  			// Restore.
   312  			a.constraints = savedConstraints
   313  			for _, n := range a.nodes {
   314  				n.solve = new(solverState)
   315  			}
   316  			a.nodes = a.nodes[:N]
   317  
   318  			// rtypes is effectively part of the solver state.
   319  			a.rtypes = typeutil.Map{}
   320  			a.rtypes.SetHasher(a.hasher)
   321  		}
   322  
   323  		a.hvn()
   324  	}
   325  
   326  	if debugHVNCrossCheck {
   327  		runtime.GC()
   328  		runtime.GC()
   329  	}
   330  
   331  	a.solve()
   332  
   333  	// Compare solutions.
   334  	if optHVN && debugHVNCrossCheck {
   335  		a.dumpSolution("B.pts", N)
   336  
   337  		if !diff("A.pts", "B.pts") {
   338  			return nil, fmt.Errorf("internal error: optimization changed solution")
   339  		}
   340  	}
   341  
   342  	// Create callgraph.Nodes in deterministic order.
   343  	if cg := a.result.CallGraph; cg != nil {
   344  		for _, caller := range a.cgnodes {
   345  			cg.CreateNode(caller.fn)
   346  		}
   347  	}
   348  
   349  	// Add dynamic edges to call graph.
   350  	var space [100]int
   351  	for _, caller := range a.cgnodes {
   352  		for _, site := range caller.sites {
   353  			for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) {
   354  				a.callEdge(caller, site, nodeid(callee))
   355  			}
   356  		}
   357  	}
   358  
   359  	return a.result, nil
   360  }
   361  
   362  // callEdge is called for each edge in the callgraph.
   363  // calleeid is the callee's object node (has otFunction flag).
   364  //
   365  func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) {
   366  	obj := a.nodes[calleeid].obj
   367  	if obj.flags&otFunction == 0 {
   368  		panic(fmt.Sprintf("callEdge %s -> n%d: not a function object", site, calleeid))
   369  	}
   370  	callee := obj.cgn
   371  
   372  	if cg := a.result.CallGraph; cg != nil {
   373  		// TODO(adonovan): opt: I would expect duplicate edges
   374  		// (to wrappers) to arise due to the elimination of
   375  		// context information, but I haven't observed any.
   376  		// Understand this better.
   377  		callgraph.AddEdge(cg.CreateNode(caller.fn), site.instr, cg.CreateNode(callee.fn))
   378  	}
   379  
   380  	if a.log != nil {
   381  		fmt.Fprintf(a.log, "\tcall edge %s -> %s\n", site, callee)
   382  	}
   383  
   384  	// Warn about calls to non-intrinsic external functions.
   385  	// TODO(adonovan): de-dup these messages.
   386  	if fn := callee.fn; fn.Blocks == nil && a.findIntrinsic(fn) == nil {
   387  		a.warnf(site.pos(), "unsound call to unknown intrinsic: %s", fn)
   388  		a.warnf(fn.Pos(), " (declared here)")
   389  	}
   390  }
   391  
   392  // dumpSolution writes the PTS solution to the specified file.
   393  //
   394  // It only dumps the nodes that existed before solving.  The order in
   395  // which solver-created nodes are created depends on pre-solver
   396  // optimization, so we can't include them in the cross-check.
   397  //
   398  func (a *analysis) dumpSolution(filename string, N int) {
   399  	f, err := os.Create(filename)
   400  	if err != nil {
   401  		panic(err)
   402  	}
   403  	for id, n := range a.nodes[:N] {
   404  		if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil {
   405  			panic(err)
   406  		}
   407  		var sep string
   408  		for _, l := range n.solve.pts.AppendTo(a.deltaSpace) {
   409  			if l >= N {
   410  				break
   411  			}
   412  			fmt.Fprintf(f, "%s%d", sep, l)
   413  			sep = " "
   414  		}
   415  		fmt.Fprintf(f, "} : %s\n", n.typ)
   416  	}
   417  	if err := f.Close(); err != nil {
   418  		panic(err)
   419  	}
   420  }
   421  
   422  // showCounts logs the size of the constraint system.  A typical
   423  // optimized distribution is 65% copy, 13% load, 11% addr, 5%
   424  // offsetAddr, 4% store, 2% others.
   425  //
   426  func (a *analysis) showCounts() {
   427  	if a.log != nil {
   428  		counts := make(map[reflect.Type]int)
   429  		for _, c := range a.constraints {
   430  			counts[reflect.TypeOf(c)]++
   431  		}
   432  		fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
   433  		var lines []string
   434  		for t, n := range counts {
   435  			line := fmt.Sprintf("%7d  (%2d%%)\t%s", n, 100*n/len(a.constraints), t)
   436  			lines = append(lines, line)
   437  		}
   438  		sort.Sort(sort.Reverse(sort.StringSlice(lines)))
   439  		for _, line := range lines {
   440  			fmt.Fprintf(a.log, "\t%s\n", line)
   441  		}
   442  
   443  		fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
   444  
   445  		// Show number of pointer equivalence classes.
   446  		m := make(map[*solverState]bool)
   447  		for _, n := range a.nodes {
   448  			m[n.solve] = true
   449  		}
   450  		fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m))
   451  	}
   452  }