github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/tools/go/callgraph/rta/rta14.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !go1.5
     6  
     7  // This package provides Rapid Type Analysis (RTA) for Go, a fast
     8  // algorithm for call graph construction and discovery of reachable code
     9  // (and hence dead code) and runtime types.  The algorithm was first
    10  // described in:
    11  //
    12  // David F. Bacon and Peter F. Sweeney. 1996.
    13  // Fast static analysis of C++ virtual function calls. (OOPSLA '96)
    14  // http://doi.acm.org/10.1145/236337.236371
    15  //
    16  // The algorithm uses dynamic programming to tabulate the cross-product
    17  // of the set of known "address taken" functions with the set of known
    18  // dynamic calls of the same type.  As each new address-taken function
    19  // is discovered, call graph edges are added from each known callsite,
    20  // and as each new call site is discovered, call graph edges are added
    21  // from it to each known address-taken function.
    22  //
    23  // A similar approach is used for dynamic calls via interfaces: it
    24  // tabulates the cross-product of the set of known "runtime types",
    25  // i.e. types that may appear in an interface value, or be derived from
    26  // one via reflection, with the set of known "invoke"-mode dynamic
    27  // calls.  As each new "runtime type" is discovered, call edges are
    28  // added from the known call sites, and as each new call site is
    29  // discovered, call graph edges are added to each compatible
    30  // method.
    31  //
    32  // In addition, we must consider all exported methods of any runtime type
    33  // as reachable, since they may be called via reflection.
    34  //
    35  // Each time a newly added call edge causes a new function to become
    36  // reachable, the code of that function is analyzed for more call sites,
    37  // address-taken functions, and runtime types.  The process continues
    38  // until a fixed point is achieved.
    39  //
    40  // The resulting call graph is less precise than one produced by pointer
    41  // analysis, but the algorithm is much faster.  For example, running the
    42  // cmd/callgraph tool on its own source takes ~2.1s for RTA and ~5.4s
    43  // for points-to analysis.
    44  //
    45  package rta // import "golang.org/x/tools/go/callgraph/rta"
    46  
    47  // TODO(adonovan): test it by connecting it to the interpreter and
    48  // replacing all "unreachable" functions by a special intrinsic, and
    49  // ensure that that intrinsic is never called.
    50  
    51  import (
    52  	"fmt"
    53  
    54  	"golang.org/x/tools/go/callgraph"
    55  	"golang.org/x/tools/go/ssa"
    56  	"golang.org/x/tools/go/types"
    57  	"golang.org/x/tools/go/types/typeutil"
    58  )
    59  
    60  // A Result holds the results of Rapid Type Analysis, which includes the
    61  // set of reachable functions/methods, runtime types, and the call graph.
    62  //
    63  type Result struct {
    64  	// CallGraph is the discovered callgraph.
    65  	// It does not include edges for calls made via reflection.
    66  	CallGraph *callgraph.Graph
    67  
    68  	// Reachable contains the set of reachable functions and methods.
    69  	// This includes exported methods of runtime types, since
    70  	// they may be accessed via reflection.
    71  	// The value indicates whether the function is address-taken.
    72  	//
    73  	// (We wrap the bool in a struct to avoid inadvertent use of
    74  	// "if Reachable[f] {" to test for set membership.)
    75  	Reachable map[*ssa.Function]struct{ AddrTaken bool }
    76  
    77  	// RuntimeTypes contains the set of types that are needed at
    78  	// runtime, for interfaces or reflection.
    79  	//
    80  	// The value indicates whether the type is inaccessible to reflection.
    81  	// Consider:
    82  	// 	type A struct{B}
    83  	// 	fmt.Println(new(A))
    84  	// Types *A, A and B are accessible to reflection, but the unnamed
    85  	// type struct{B} is not.
    86  	RuntimeTypes typeutil.Map
    87  }
    88  
    89  // Working state of the RTA algorithm.
    90  type rta struct {
    91  	result *Result
    92  
    93  	prog *ssa.Program
    94  
    95  	worklist []*ssa.Function // list of functions to visit
    96  
    97  	// addrTakenFuncsBySig contains all address-taken *Functions, grouped by signature.
    98  	// Keys are *types.Signature, values are map[*ssa.Function]bool sets.
    99  	addrTakenFuncsBySig typeutil.Map
   100  
   101  	// dynCallSites contains all dynamic "call"-mode call sites, grouped by signature.
   102  	// Keys are *types.Signature, values are unordered []ssa.CallInstruction.
   103  	dynCallSites typeutil.Map
   104  
   105  	// invokeSites contains all "invoke"-mode call sites, grouped by interface.
   106  	// Keys are *types.Interface (never *types.Named),
   107  	// Values are unordered []ssa.CallInstruction sets.
   108  	invokeSites typeutil.Map
   109  
   110  	// The following two maps together define the subset of the
   111  	// m:n "implements" relation needed by the algorithm.
   112  
   113  	// concreteTypes maps each concrete type to the set of interfaces that it implements.
   114  	// Keys are types.Type, values are unordered []*types.Interface.
   115  	// Only concrete types used as MakeInterface operands are included.
   116  	concreteTypes typeutil.Map
   117  
   118  	// interfaceTypes maps each interface type to
   119  	// the set of concrete types that implement it.
   120  	// Keys are *types.Interface, values are unordered []types.Type.
   121  	// Only interfaces used in "invoke"-mode CallInstructions are included.
   122  	interfaceTypes typeutil.Map
   123  }
   124  
   125  // addReachable marks a function as potentially callable at run-time,
   126  // and ensures that it gets processed.
   127  func (r *rta) addReachable(f *ssa.Function, addrTaken bool) {
   128  	reachable := r.result.Reachable
   129  	n := len(reachable)
   130  	v := reachable[f]
   131  	if addrTaken {
   132  		v.AddrTaken = true
   133  	}
   134  	reachable[f] = v
   135  	if len(reachable) > n {
   136  		// First time seeing f.  Add it to the worklist.
   137  		r.worklist = append(r.worklist, f)
   138  	}
   139  }
   140  
   141  // addEdge adds the specified call graph edge, and marks it reachable.
   142  // addrTaken indicates whether to mark the callee as "address-taken".
   143  func (r *rta) addEdge(site ssa.CallInstruction, callee *ssa.Function, addrTaken bool) {
   144  	r.addReachable(callee, addrTaken)
   145  
   146  	if g := r.result.CallGraph; g != nil {
   147  		if site.Parent() == nil {
   148  			panic(site)
   149  		}
   150  		from := g.CreateNode(site.Parent())
   151  		to := g.CreateNode(callee)
   152  		callgraph.AddEdge(from, site, to)
   153  	}
   154  }
   155  
   156  // ---------- addrTakenFuncs × dynCallSites ----------
   157  
   158  // visitAddrTakenFunc is called each time we encounter an address-taken function f.
   159  func (r *rta) visitAddrTakenFunc(f *ssa.Function) {
   160  	// Create two-level map (Signature -> Function -> bool).
   161  	S := f.Signature
   162  	funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool)
   163  	if funcs == nil {
   164  		funcs = make(map[*ssa.Function]bool)
   165  		r.addrTakenFuncsBySig.Set(S, funcs)
   166  	}
   167  	if !funcs[f] {
   168  		// First time seeing f.
   169  		funcs[f] = true
   170  
   171  		// If we've seen any dyncalls of this type, mark it reachable,
   172  		// and add call graph edges.
   173  		sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction)
   174  		for _, site := range sites {
   175  			r.addEdge(site, f, true)
   176  		}
   177  	}
   178  }
   179  
   180  // visitDynCall is called each time we encounter a dynamic "call"-mode call.
   181  func (r *rta) visitDynCall(site ssa.CallInstruction) {
   182  	S := site.Common().Signature()
   183  
   184  	// Record the call site.
   185  	sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction)
   186  	r.dynCallSites.Set(S, append(sites, site))
   187  
   188  	// For each function of signature S that we know is address-taken,
   189  	// mark it reachable.  We'll add the callgraph edges later.
   190  	funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool)
   191  	for g := range funcs {
   192  		r.addEdge(site, g, true)
   193  	}
   194  }
   195  
   196  // ---------- concrete types × invoke sites ----------
   197  
   198  // addInvokeEdge is called for each new pair (site, C) in the matrix.
   199  func (r *rta) addInvokeEdge(site ssa.CallInstruction, C types.Type) {
   200  	// Ascertain the concrete method of C to be called.
   201  	imethod := site.Common().Method
   202  	cmethod := r.prog.MethodValue(r.prog.MethodSets.MethodSet(C).Lookup(imethod.Pkg(), imethod.Name()))
   203  	r.addEdge(site, cmethod, true)
   204  }
   205  
   206  // visitInvoke is called each time the algorithm encounters an "invoke"-mode call.
   207  func (r *rta) visitInvoke(site ssa.CallInstruction) {
   208  	I := site.Common().Value.Type().Underlying().(*types.Interface)
   209  
   210  	// Record the invoke site.
   211  	sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction)
   212  	r.invokeSites.Set(I, append(sites, site))
   213  
   214  	// Add callgraph edge for each existing
   215  	// address-taken concrete type implementing I.
   216  	for _, C := range r.implementations(I) {
   217  		r.addInvokeEdge(site, C)
   218  	}
   219  }
   220  
   221  // ---------- main algorithm ----------
   222  
   223  // visitFunc processes function f.
   224  func (r *rta) visitFunc(f *ssa.Function) {
   225  	var space [32]*ssa.Value // preallocate space for common case
   226  
   227  	for _, b := range f.Blocks {
   228  		for _, instr := range b.Instrs {
   229  			rands := instr.Operands(space[:0])
   230  
   231  			switch instr := instr.(type) {
   232  			case ssa.CallInstruction:
   233  				call := instr.Common()
   234  				if call.IsInvoke() {
   235  					r.visitInvoke(instr)
   236  				} else if g := call.StaticCallee(); g != nil {
   237  					r.addEdge(instr, g, false)
   238  				} else if _, ok := call.Value.(*ssa.Builtin); !ok {
   239  					r.visitDynCall(instr)
   240  				}
   241  
   242  				// Ignore the call-position operand when
   243  				// looking for address-taken Functions.
   244  				// Hack: assume this is rands[0].
   245  				rands = rands[1:]
   246  
   247  			case *ssa.MakeInterface:
   248  				r.addRuntimeType(instr.X.Type(), false)
   249  			}
   250  
   251  			// Process all address-taken functions.
   252  			for _, op := range rands {
   253  				if g, ok := (*op).(*ssa.Function); ok {
   254  					r.visitAddrTakenFunc(g)
   255  				}
   256  			}
   257  		}
   258  	}
   259  }
   260  
   261  // Analyze performs Rapid Type Analysis, starting at the specified root
   262  // functions.  It returns nil if no roots were specified.
   263  //
   264  // If buildCallGraph is true, Result.CallGraph will contain a call
   265  // graph; otherwise, only the other fields (reachable functions) are
   266  // populated.
   267  //
   268  func Analyze(roots []*ssa.Function, buildCallGraph bool) *Result {
   269  	if len(roots) == 0 {
   270  		return nil
   271  	}
   272  
   273  	r := &rta{
   274  		result: &Result{Reachable: make(map[*ssa.Function]struct{ AddrTaken bool })},
   275  		prog:   roots[0].Prog,
   276  	}
   277  
   278  	if buildCallGraph {
   279  		// TODO(adonovan): change callgraph API to eliminate the
   280  		// notion of a distinguished root node.  Some callgraphs
   281  		// have many roots, or none.
   282  		r.result.CallGraph = callgraph.New(roots[0])
   283  	}
   284  
   285  	hasher := typeutil.MakeHasher()
   286  	r.result.RuntimeTypes.SetHasher(hasher)
   287  	r.addrTakenFuncsBySig.SetHasher(hasher)
   288  	r.dynCallSites.SetHasher(hasher)
   289  	r.invokeSites.SetHasher(hasher)
   290  	r.concreteTypes.SetHasher(hasher)
   291  	r.interfaceTypes.SetHasher(hasher)
   292  
   293  	// Visit functions, processing their instructions, and adding
   294  	// new functions to the worklist, until a fixed point is
   295  	// reached.
   296  	var shadow []*ssa.Function // for efficiency, we double-buffer the worklist
   297  	r.worklist = append(r.worklist, roots...)
   298  	for len(r.worklist) > 0 {
   299  		shadow, r.worklist = r.worklist, shadow[:0]
   300  		for _, f := range shadow {
   301  			r.visitFunc(f)
   302  		}
   303  	}
   304  	return r.result
   305  }
   306  
   307  // interfaces(C) returns all currently known interfaces implemented by C.
   308  func (r *rta) interfaces(C types.Type) []*types.Interface {
   309  	// Ascertain set of interfaces C implements
   310  	// and update 'implements' relation.
   311  	var ifaces []*types.Interface
   312  	r.interfaceTypes.Iterate(func(I types.Type, concs interface{}) {
   313  		if I := I.(*types.Interface); types.Implements(C, I) {
   314  			concs, _ := concs.([]types.Type)
   315  			r.interfaceTypes.Set(I, append(concs, C))
   316  			ifaces = append(ifaces, I)
   317  		}
   318  	})
   319  	r.concreteTypes.Set(C, ifaces)
   320  	return ifaces
   321  }
   322  
   323  // implementations(I) returns all currently known concrete types that implement I.
   324  func (r *rta) implementations(I *types.Interface) []types.Type {
   325  	var concs []types.Type
   326  	if v := r.interfaceTypes.At(I); v != nil {
   327  		concs = v.([]types.Type)
   328  	} else {
   329  		// First time seeing this interface.
   330  		// Update the 'implements' relation.
   331  		r.concreteTypes.Iterate(func(C types.Type, ifaces interface{}) {
   332  			if types.Implements(C, I) {
   333  				ifaces, _ := ifaces.([]*types.Interface)
   334  				r.concreteTypes.Set(C, append(ifaces, I))
   335  				concs = append(concs, C)
   336  			}
   337  		})
   338  		r.interfaceTypes.Set(I, concs)
   339  	}
   340  	return concs
   341  }
   342  
   343  // addRuntimeType is called for each concrete type that can be the
   344  // dynamic type of some interface or reflect.Value.
   345  // Adapted from needMethods in go/ssa/builder.go
   346  //
   347  func (r *rta) addRuntimeType(T types.Type, skip bool) {
   348  	if prev, ok := r.result.RuntimeTypes.At(T).(bool); ok {
   349  		if skip && !prev {
   350  			r.result.RuntimeTypes.Set(T, skip)
   351  		}
   352  		return
   353  	}
   354  	r.result.RuntimeTypes.Set(T, skip)
   355  
   356  	mset := r.prog.MethodSets.MethodSet(T)
   357  
   358  	if _, ok := T.Underlying().(*types.Interface); !ok {
   359  		// T is a new concrete type.
   360  		for i, n := 0, mset.Len(); i < n; i++ {
   361  			sel := mset.At(i)
   362  			m := sel.Obj()
   363  
   364  			if m.Exported() {
   365  				// Exported methods are always potentially callable via reflection.
   366  				r.addReachable(r.prog.MethodValue(sel), true)
   367  			}
   368  		}
   369  
   370  		// Add callgraph edge for each existing dynamic
   371  		// "invoke"-mode call via that interface.
   372  		for _, I := range r.interfaces(T) {
   373  			sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction)
   374  			for _, site := range sites {
   375  				r.addInvokeEdge(site, T)
   376  			}
   377  		}
   378  	}
   379  
   380  	// Precondition: T is not a method signature (*Signature with Recv()!=nil).
   381  	// Recursive case: skip => don't call makeMethods(T).
   382  	// Each package maintains its own set of types it has visited.
   383  
   384  	var n *types.Named
   385  	switch T := T.(type) {
   386  	case *types.Named:
   387  		n = T
   388  	case *types.Pointer:
   389  		n, _ = T.Elem().(*types.Named)
   390  	}
   391  	if n != nil {
   392  		owner := n.Obj().Pkg()
   393  		if owner == nil {
   394  			return // built-in error type
   395  		}
   396  	}
   397  
   398  	// Recursion over signatures of each exported method.
   399  	for i := 0; i < mset.Len(); i++ {
   400  		if mset.At(i).Obj().Exported() {
   401  			sig := mset.At(i).Type().(*types.Signature)
   402  			r.addRuntimeType(sig.Params(), true)  // skip the Tuple itself
   403  			r.addRuntimeType(sig.Results(), true) // skip the Tuple itself
   404  		}
   405  	}
   406  
   407  	switch t := T.(type) {
   408  	case *types.Basic:
   409  		// nop
   410  
   411  	case *types.Interface:
   412  		// nop---handled by recursion over method set.
   413  
   414  	case *types.Pointer:
   415  		r.addRuntimeType(t.Elem(), false)
   416  
   417  	case *types.Slice:
   418  		r.addRuntimeType(t.Elem(), false)
   419  
   420  	case *types.Chan:
   421  		r.addRuntimeType(t.Elem(), false)
   422  
   423  	case *types.Map:
   424  		r.addRuntimeType(t.Key(), false)
   425  		r.addRuntimeType(t.Elem(), false)
   426  
   427  	case *types.Signature:
   428  		if t.Recv() != nil {
   429  			panic(fmt.Sprintf("Signature %s has Recv %s", t, t.Recv()))
   430  		}
   431  		r.addRuntimeType(t.Params(), true)  // skip the Tuple itself
   432  		r.addRuntimeType(t.Results(), true) // skip the Tuple itself
   433  
   434  	case *types.Named:
   435  		// A pointer-to-named type can be derived from a named
   436  		// type via reflection.  It may have methods too.
   437  		r.addRuntimeType(types.NewPointer(T), false)
   438  
   439  		// Consider 'type T struct{S}' where S has methods.
   440  		// Reflection provides no way to get from T to struct{S},
   441  		// only to S, so the method set of struct{S} is unwanted,
   442  		// so set 'skip' flag during recursion.
   443  		r.addRuntimeType(t.Underlying(), true)
   444  
   445  	case *types.Array:
   446  		r.addRuntimeType(t.Elem(), false)
   447  
   448  	case *types.Struct:
   449  		for i, n := 0, t.NumFields(); i < n; i++ {
   450  			r.addRuntimeType(t.Field(i).Type(), false)
   451  		}
   452  
   453  	case *types.Tuple:
   454  		for i, n := 0, t.Len(); i < n; i++ {
   455  			r.addRuntimeType(t.At(i).Type(), false)
   456  		}
   457  
   458  	default:
   459  		panic(T)
   460  	}
   461  }