golang.org/x/tools/gopls@v0.15.3/internal/cache/metadata/graph.go (about)

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package metadata
     6  
     7  import (
     8  	"sort"
     9  
    10  	"golang.org/x/tools/go/packages"
    11  	"golang.org/x/tools/gopls/internal/protocol"
    12  	"golang.org/x/tools/gopls/internal/util/bug"
    13  )
    14  
    15  // A Graph is an immutable and transitively closed graph of [Package] data.
    16  type Graph struct {
    17  	// Packages maps package IDs to their associated Packages.
    18  	Packages map[PackageID]*Package
    19  
    20  	// ImportedBy maps package IDs to the list of packages that import them.
    21  	ImportedBy map[PackageID][]PackageID
    22  
    23  	// IDs maps file URIs to package IDs, sorted by (!valid, cli, packageID).
    24  	// A single file may belong to multiple packages due to tests packages.
    25  	//
    26  	// Invariant: all IDs present in the IDs map exist in the metadata map.
    27  	IDs map[protocol.DocumentURI][]PackageID
    28  }
    29  
    30  // Update creates a new Graph containing the result of applying the given
    31  // updates to the receiver, though the receiver is not itself mutated. As a
    32  // special case, if updates is empty, Update just returns the receiver.
    33  //
    34  // A nil map value is used to indicate a deletion.
    35  func (g *Graph) Update(updates map[PackageID]*Package) *Graph {
    36  	if len(updates) == 0 {
    37  		// Optimization: since the graph is immutable, we can return the receiver.
    38  		return g
    39  	}
    40  
    41  	// Debugging golang/go#64227, golang/vscode-go#3126:
    42  	// Assert that the existing metadata graph is acyclic.
    43  	if cycle := cyclic(g.Packages); cycle != "" {
    44  		bug.Reportf("metadata is cyclic even before updates: %s", cycle)
    45  	}
    46  	// Assert that the updates contain no self-cycles.
    47  	for id, mp := range updates {
    48  		if mp != nil {
    49  			for _, depID := range mp.DepsByPkgPath {
    50  				if depID == id {
    51  					bug.Reportf("self-cycle in metadata update: %s", id)
    52  				}
    53  			}
    54  		}
    55  	}
    56  
    57  	// Copy pkgs map then apply updates.
    58  	pkgs := make(map[PackageID]*Package, len(g.Packages))
    59  	for id, mp := range g.Packages {
    60  		pkgs[id] = mp
    61  	}
    62  	for id, mp := range updates {
    63  		if mp == nil {
    64  			delete(pkgs, id)
    65  		} else {
    66  			pkgs[id] = mp
    67  		}
    68  	}
    69  
    70  	// Break import cycles involving updated nodes.
    71  	breakImportCycles(pkgs, updates)
    72  
    73  	return newGraph(pkgs)
    74  }
    75  
    76  // newGraph returns a new metadataGraph,
    77  // deriving relations from the specified metadata.
    78  func newGraph(pkgs map[PackageID]*Package) *Graph {
    79  	// Build the import graph.
    80  	importedBy := make(map[PackageID][]PackageID)
    81  	for id, mp := range pkgs {
    82  		for _, depID := range mp.DepsByPkgPath {
    83  			importedBy[depID] = append(importedBy[depID], id)
    84  		}
    85  	}
    86  
    87  	// Collect file associations.
    88  	uriIDs := make(map[protocol.DocumentURI][]PackageID)
    89  	for id, mp := range pkgs {
    90  		uris := map[protocol.DocumentURI]struct{}{}
    91  		for _, uri := range mp.CompiledGoFiles {
    92  			uris[uri] = struct{}{}
    93  		}
    94  		for _, uri := range mp.GoFiles {
    95  			uris[uri] = struct{}{}
    96  		}
    97  		for uri := range uris {
    98  			uriIDs[uri] = append(uriIDs[uri], id)
    99  		}
   100  	}
   101  
   102  	// Sort and filter file associations.
   103  	for uri, ids := range uriIDs {
   104  		sort.Slice(ids, func(i, j int) bool {
   105  			cli := IsCommandLineArguments(ids[i])
   106  			clj := IsCommandLineArguments(ids[j])
   107  			if cli != clj {
   108  				return clj
   109  			}
   110  
   111  			// 2. packages appear in name order.
   112  			return ids[i] < ids[j]
   113  		})
   114  
   115  		// Choose the best IDs for each URI, according to the following rules:
   116  		//  - If there are any valid real packages, choose them.
   117  		//  - Else, choose the first valid command-line-argument package, if it exists.
   118  		//
   119  		// TODO(rfindley): it might be better to track all IDs here, and exclude
   120  		// them later when type checking, but this is the existing behavior.
   121  		for i, id := range ids {
   122  			// If we've seen *anything* prior to command-line arguments package, take
   123  			// it. Note that ids[0] may itself be command-line-arguments.
   124  			if i > 0 && IsCommandLineArguments(id) {
   125  				uriIDs[uri] = ids[:i]
   126  				break
   127  			}
   128  		}
   129  	}
   130  
   131  	return &Graph{
   132  		Packages:   pkgs,
   133  		ImportedBy: importedBy,
   134  		IDs:        uriIDs,
   135  	}
   136  }
   137  
   138  // ReverseReflexiveTransitiveClosure returns a new mapping containing the
   139  // metadata for the specified packages along with any package that
   140  // transitively imports one of them, keyed by ID, including all the initial packages.
   141  func (g *Graph) ReverseReflexiveTransitiveClosure(ids ...PackageID) map[PackageID]*Package {
   142  	seen := make(map[PackageID]*Package)
   143  	var visitAll func([]PackageID)
   144  	visitAll = func(ids []PackageID) {
   145  		for _, id := range ids {
   146  			if seen[id] == nil {
   147  				if mp := g.Packages[id]; mp != nil {
   148  					seen[id] = mp
   149  					visitAll(g.ImportedBy[id])
   150  				}
   151  			}
   152  		}
   153  	}
   154  	visitAll(ids)
   155  	return seen
   156  }
   157  
   158  // breakImportCycles breaks import cycles in the metadata by deleting
   159  // Deps* edges. It modifies only metadata present in the 'updates'
   160  // subset. This function has an internal test.
   161  func breakImportCycles(metadata, updates map[PackageID]*Package) {
   162  	// 'go list' should never report a cycle without flagging it
   163  	// as such, but we're extra cautious since we're combining
   164  	// information from multiple runs of 'go list'. Also, Bazel
   165  	// may silently report cycles.
   166  	cycles := detectImportCycles(metadata, updates)
   167  	if len(cycles) > 0 {
   168  		// There were cycles (uncommon). Break them.
   169  		//
   170  		// The naive way to break cycles would be to perform a
   171  		// depth-first traversal and to detect and delete
   172  		// cycle-forming edges as we encounter them.
   173  		// However, we're not allowed to modify the existing
   174  		// Metadata records, so we can only break edges out of
   175  		// the 'updates' subset.
   176  		//
   177  		// Another possibility would be to delete not the
   178  		// cycle forming edge but the topmost edge on the
   179  		// stack whose tail is an updated node.
   180  		// However, this would require that we retroactively
   181  		// undo all the effects of the traversals that
   182  		// occurred since that edge was pushed on the stack.
   183  		//
   184  		// We use a simpler scheme: we compute the set of cycles.
   185  		// All cyclic paths necessarily involve at least one
   186  		// updated node, so it is sufficient to break all
   187  		// edges from each updated node to other members of
   188  		// the strong component.
   189  		//
   190  		// This may result in the deletion of dominating
   191  		// edges, causing some dependencies to appear
   192  		// spuriously unreachable. Consider A <-> B -> C
   193  		// where updates={A,B}. The cycle is {A,B} so the
   194  		// algorithm will break both A->B and B->A, causing
   195  		// A to no longer depend on B or C.
   196  		//
   197  		// But that's ok: any error in Metadata.Errors is
   198  		// conservatively assumed by snapshot.clone to be a
   199  		// potential import cycle error, and causes special
   200  		// invalidation so that if B later drops its
   201  		// cycle-forming import of A, both A and B will be
   202  		// invalidated.
   203  		for _, cycle := range cycles {
   204  			cyclic := make(map[PackageID]bool)
   205  			for _, mp := range cycle {
   206  				cyclic[mp.ID] = true
   207  			}
   208  			for id := range cyclic {
   209  				if mp := updates[id]; mp != nil {
   210  					for path, depID := range mp.DepsByImpPath {
   211  						if cyclic[depID] {
   212  							delete(mp.DepsByImpPath, path)
   213  						}
   214  					}
   215  					for path, depID := range mp.DepsByPkgPath {
   216  						if cyclic[depID] {
   217  							delete(mp.DepsByPkgPath, path)
   218  						}
   219  					}
   220  
   221  					// Set m.Errors to enable special
   222  					// invalidation logic in snapshot.clone.
   223  					if len(mp.Errors) == 0 {
   224  						mp.Errors = []packages.Error{{
   225  							Msg:  "detected import cycle",
   226  							Kind: packages.ListError,
   227  						}}
   228  					}
   229  				}
   230  			}
   231  		}
   232  
   233  		// double-check when debugging
   234  		if false {
   235  			if cycles := detectImportCycles(metadata, updates); len(cycles) > 0 {
   236  				bug.Reportf("unbroken cycle: %v", cycles)
   237  			}
   238  		}
   239  	}
   240  }
   241  
   242  // cyclic returns a description of a cycle,
   243  // if the graph is cyclic, otherwise "".
   244  func cyclic(graph map[PackageID]*Package) string {
   245  	const (
   246  		unvisited = 0
   247  		visited   = 1
   248  		onstack   = 2
   249  	)
   250  	color := make(map[PackageID]int)
   251  	var visit func(id PackageID) string
   252  	visit = func(id PackageID) string {
   253  		switch color[id] {
   254  		case unvisited:
   255  			color[id] = onstack
   256  		case onstack:
   257  			return string(id) // cycle!
   258  		case visited:
   259  			return ""
   260  		}
   261  		if mp := graph[id]; mp != nil {
   262  			for _, depID := range mp.DepsByPkgPath {
   263  				if cycle := visit(depID); cycle != "" {
   264  					return string(id) + "->" + cycle
   265  				}
   266  			}
   267  		}
   268  		color[id] = visited
   269  		return ""
   270  	}
   271  	for id := range graph {
   272  		if cycle := visit(id); cycle != "" {
   273  			return cycle
   274  		}
   275  	}
   276  	return ""
   277  }
   278  
   279  // detectImportCycles reports cycles in the metadata graph. It returns a new
   280  // unordered array of all cycles (nontrivial strong components) in the
   281  // metadata graph reachable from a non-nil 'updates' value.
   282  func detectImportCycles(metadata, updates map[PackageID]*Package) [][]*Package {
   283  	// We use the depth-first algorithm of Tarjan.
   284  	// https://doi.org/10.1137/0201010
   285  	//
   286  	// TODO(adonovan): when we can use generics, consider factoring
   287  	// in common with the other implementation of Tarjan (in typerefs),
   288  	// abstracting over the node and edge representation.
   289  
   290  	// A node wraps a Metadata with its working state.
   291  	// (Unfortunately we can't intrude on shared Metadata.)
   292  	type node struct {
   293  		rep            *node
   294  		mp             *Package
   295  		index, lowlink int32
   296  		scc            int8 // TODO(adonovan): opt: cram these 1.5 bits into previous word
   297  	}
   298  	nodes := make(map[PackageID]*node, len(metadata))
   299  	nodeOf := func(id PackageID) *node {
   300  		n, ok := nodes[id]
   301  		if !ok {
   302  			mp := metadata[id]
   303  			if mp == nil {
   304  				// Dangling import edge.
   305  				// Not sure whether a go/packages driver ever
   306  				// emits this, but create a dummy node in case.
   307  				// Obviously it won't be part of any cycle.
   308  				mp = &Package{ID: id}
   309  			}
   310  			n = &node{mp: mp}
   311  			n.rep = n
   312  			nodes[id] = n
   313  		}
   314  		return n
   315  	}
   316  
   317  	// find returns the canonical node decl.
   318  	// (The nodes form a disjoint set forest.)
   319  	var find func(*node) *node
   320  	find = func(n *node) *node {
   321  		rep := n.rep
   322  		if rep != n {
   323  			rep = find(rep)
   324  			n.rep = rep // simple path compression (no union-by-rank)
   325  		}
   326  		return rep
   327  	}
   328  
   329  	// global state
   330  	var (
   331  		index int32 = 1
   332  		stack []*node
   333  		sccs  [][]*Package // set of nontrivial strongly connected components
   334  	)
   335  
   336  	// visit implements the depth-first search of Tarjan's SCC algorithm
   337  	// Precondition: x is canonical.
   338  	var visit func(*node)
   339  	visit = func(x *node) {
   340  		x.index = index
   341  		x.lowlink = index
   342  		index++
   343  
   344  		stack = append(stack, x) // push
   345  		x.scc = -1
   346  
   347  		for _, yid := range x.mp.DepsByPkgPath {
   348  			y := nodeOf(yid)
   349  			// Loop invariant: x is canonical.
   350  			y = find(y)
   351  			if x == y {
   352  				continue // nodes already combined (self-edges are impossible)
   353  			}
   354  
   355  			switch {
   356  			case y.scc > 0:
   357  				// y is already a collapsed SCC
   358  
   359  			case y.scc < 0:
   360  				// y is on the stack, and thus in the current SCC.
   361  				if y.index < x.lowlink {
   362  					x.lowlink = y.index
   363  				}
   364  
   365  			default:
   366  				// y is unvisited; visit it now.
   367  				visit(y)
   368  				// Note: x and y are now non-canonical.
   369  				x = find(x)
   370  				if y.lowlink < x.lowlink {
   371  					x.lowlink = y.lowlink
   372  				}
   373  			}
   374  		}
   375  
   376  		// Is x the root of an SCC?
   377  		if x.lowlink == x.index {
   378  			// Gather all metadata in the SCC (if nontrivial).
   379  			var scc []*Package
   380  			for {
   381  				// Pop y from stack.
   382  				i := len(stack) - 1
   383  				y := stack[i]
   384  				stack = stack[:i]
   385  				if x != y || scc != nil {
   386  					scc = append(scc, y.mp)
   387  				}
   388  				if x == y {
   389  					break // complete
   390  				}
   391  				// x becomes y's canonical representative.
   392  				y.rep = x
   393  			}
   394  			if scc != nil {
   395  				sccs = append(sccs, scc)
   396  			}
   397  			x.scc = 1
   398  		}
   399  	}
   400  
   401  	// Visit only the updated nodes:
   402  	// the existing metadata graph has no cycles,
   403  	// so any new cycle must involve an updated node.
   404  	for id, mp := range updates {
   405  		if mp != nil {
   406  			if n := nodeOf(id); n.index == 0 { // unvisited
   407  				visit(n)
   408  			}
   409  		}
   410  	}
   411  
   412  	return sccs
   413  }