cuelang.org/go@v0.13.0/internal/golangorgx/gopls/cache/metadata/graph.go (about)

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package metadata
     6  
     7  import (
     8  	"maps"
     9  	"sort"
    10  
    11  	"cuelang.org/go/cue/build"
    12  	"cuelang.org/go/internal/golangorgx/gopls/protocol"
    13  	"cuelang.org/go/internal/golangorgx/gopls/util/bug"
    14  	"golang.org/x/tools/go/packages"
    15  )
    16  
    17  // A Graph is an immutable and transitively closed graph of [Package] data.
    18  type Graph struct {
    19  	// Packages maps package import paths to their associated Packages.
    20  	Packages map[ImportPath]*build.Instance
    21  
    22  	// ImportedBy maps package IDs to the list of packages that import them.
    23  	// Note this is direct imports only; not transitive.
    24  	ImportedBy map[ImportPath][]ImportPath
    25  
    26  	// FilesToPackage maps file URIs to package import paths, sorted by
    27  	// (!valid, cli, packageID).  A single file may belong to multiple
    28  	// packages due to ancestor package import.
    29  	//
    30  	// Invariant: all ImportPaths present in the FilesToPackage map
    31  	// exist in the Packages map.
    32  	FilesToPackage map[protocol.DocumentURI][]ImportPath
    33  }
    34  
    35  // Update creates a new Graph containing the result of applying the given
    36  // updates to the receiver, though the receiver is not itself mutated. As a
    37  // special case, if updates is empty, Update just returns the receiver.
    38  //
    39  // A nil map value is used to indicate a deletion.
    40  func (g *Graph) Update(updates map[ImportPath]*build.Instance) *Graph {
    41  	if len(updates) == 0 {
    42  		// Optimization: since the graph is immutable, we can return the receiver.
    43  		return g
    44  	}
    45  
    46  	// Debugging golang/go#64227, golang/vscode-go#3126:
    47  	// Assert that the existing metadata graph is acyclic.
    48  	if cycle := cyclic(g.Packages); cycle != "" {
    49  		bug.Reportf("metadata is cyclic even before updates: %s", cycle)
    50  	}
    51  	// Assert that the updates contain no self-cycles.
    52  	for path, inst := range updates {
    53  		if inst != nil {
    54  			for _, importedInst := range inst.Imports {
    55  				if importedInst == inst {
    56  					bug.Reportf("self-cycle in metadata update: %s", path)
    57  				}
    58  			}
    59  		}
    60  	}
    61  
    62  	// Copy pkgs map then apply updates.
    63  	pkgs := maps.Clone(g.Packages)
    64  	if pkgs == nil {
    65  		pkgs = make(map[ImportPath]*build.Instance)
    66  	}
    67  	for path, inst := range updates {
    68  		if inst == nil {
    69  			delete(pkgs, path)
    70  		} else {
    71  			pkgs[path] = inst
    72  		}
    73  	}
    74  
    75  	// Break import cycles involving updated nodes.
    76  	// TODO(ms): figure out if we need anything like this
    77  	//breakImportCycles(pkgs, updates)
    78  
    79  	return newGraph(pkgs)
    80  }
    81  
    82  // newGraph returns a new metadataGraph,
    83  // deriving relations from the specified metadata.
    84  func newGraph(pkgs map[ImportPath]*build.Instance) *Graph {
    85  	// Build the import graph.
    86  	importedBy := make(map[ImportPath][]ImportPath)
    87  	for path, inst := range pkgs {
    88  		for _, importedInst := range inst.Imports {
    89  			importedPath := ImportPath(importedInst.ImportPath)
    90  			importedBy[importedPath] = append(importedBy[importedPath], path)
    91  		}
    92  	}
    93  
    94  	// Collect file associations.
    95  	filesToPkg := make(map[protocol.DocumentURI][]ImportPath)
    96  	for pkgImportPath, inst := range pkgs {
    97  		files := map[protocol.DocumentURI]struct{}{}
    98  		for _, file := range inst.BuildFiles {
    99  			files[protocol.URIFromPath(file.Filename)] = struct{}{}
   100  		}
   101  		for file := range files {
   102  			filesToPkg[file] = append(filesToPkg[file], pkgImportPath)
   103  		}
   104  	}
   105  
   106  	// Sort and filter file associations.
   107  	for file, pkgImportPaths := range filesToPkg {
   108  		sort.Slice(pkgImportPaths, func(i, j int) bool {
   109  			cli := IsCommandLineArguments(pkgImportPaths[i])
   110  			clj := IsCommandLineArguments(pkgImportPaths[j])
   111  			if cli != clj {
   112  				return clj
   113  			}
   114  
   115  			// 2. packages appear in name order.
   116  			return pkgImportPaths[i] < pkgImportPaths[j]
   117  		})
   118  
   119  		// Choose the best IDs for each URI, according to the following rules:
   120  		//  - If there are any valid real packages, choose them.
   121  		//  - Else, choose the first valid command-line-argument package, if it exists.
   122  		//
   123  		// TODO(rfindley): it might be better to track all IDs here, and exclude
   124  		// them later when type checking, but this is the existing behavior.
   125  		//
   126  		// TODO(ms): is any of this stuff needed for cue?
   127  		for i, pkgImportPath := range pkgImportPaths {
   128  			// If we've seen *anything* prior to command-line arguments package, take
   129  			// it. Note that ids[0] may itself be command-line-arguments.
   130  			if i > 0 && IsCommandLineArguments(pkgImportPath) {
   131  				filesToPkg[file] = pkgImportPaths[:i]
   132  				break
   133  			}
   134  		}
   135  	}
   136  
   137  	return &Graph{
   138  		Packages:       pkgs,
   139  		ImportedBy:     importedBy,
   140  		FilesToPackage: filesToPkg,
   141  	}
   142  }
   143  
   144  // breakImportCycles breaks import cycles in the metadata by deleting
   145  // Deps* edges. It modifies only metadata present in the 'updates'
   146  // subset. This function has an internal test.
   147  func breakImportCycles(metadata, updates map[PackageID]*Package) {
   148  	// 'go list' should never report a cycle without flagging it
   149  	// as such, but we're extra cautious since we're combining
   150  	// information from multiple runs of 'go list'. Also, Bazel
   151  	// may silently report cycles.
   152  	cycles := detectImportCycles(metadata, updates)
   153  	if len(cycles) > 0 {
   154  		// There were cycles (uncommon). Break them.
   155  		//
   156  		// The naive way to break cycles would be to perform a
   157  		// depth-first traversal and to detect and delete
   158  		// cycle-forming edges as we encounter them.
   159  		// However, we're not allowed to modify the existing
   160  		// Metadata records, so we can only break edges out of
   161  		// the 'updates' subset.
   162  		//
   163  		// Another possibility would be to delete not the
   164  		// cycle forming edge but the topmost edge on the
   165  		// stack whose tail is an updated node.
   166  		// However, this would require that we retroactively
   167  		// undo all the effects of the traversals that
   168  		// occurred since that edge was pushed on the stack.
   169  		//
   170  		// We use a simpler scheme: we compute the set of cycles.
   171  		// All cyclic paths necessarily involve at least one
   172  		// updated node, so it is sufficient to break all
   173  		// edges from each updated node to other members of
   174  		// the strong component.
   175  		//
   176  		// This may result in the deletion of dominating
   177  		// edges, causing some dependencies to appear
   178  		// spuriously unreachable. Consider A <-> B -> C
   179  		// where updates={A,B}. The cycle is {A,B} so the
   180  		// algorithm will break both A->B and B->A, causing
   181  		// A to no longer depend on B or C.
   182  		//
   183  		// But that's ok: any error in Metadata.Errors is
   184  		// conservatively assumed by snapshot.clone to be a
   185  		// potential import cycle error, and causes special
   186  		// invalidation so that if B later drops its
   187  		// cycle-forming import of A, both A and B will be
   188  		// invalidated.
   189  		for _, cycle := range cycles {
   190  			cyclic := make(map[PackageID]bool)
   191  			for _, mp := range cycle {
   192  				cyclic[mp.ID] = true
   193  			}
   194  			for id := range cyclic {
   195  				if mp := updates[id]; mp != nil {
   196  					for path, depID := range mp.DepsByImpPath {
   197  						if cyclic[depID] {
   198  							delete(mp.DepsByImpPath, path)
   199  						}
   200  					}
   201  					for path, depID := range mp.DepsByPkgPath {
   202  						if cyclic[depID] {
   203  							delete(mp.DepsByPkgPath, path)
   204  						}
   205  					}
   206  
   207  					// Set m.Errors to enable special
   208  					// invalidation logic in snapshot.clone.
   209  					if len(mp.Errors) == 0 {
   210  						mp.Errors = []packages.Error{{
   211  							Msg:  "detected import cycle",
   212  							Kind: packages.ListError,
   213  						}}
   214  					}
   215  				}
   216  			}
   217  		}
   218  
   219  		// double-check when debugging
   220  		if false {
   221  			if cycles := detectImportCycles(metadata, updates); len(cycles) > 0 {
   222  				bug.Reportf("unbroken cycle: %v", cycles)
   223  			}
   224  		}
   225  	}
   226  }
   227  
   228  // cyclic returns a description of a cycle,
   229  // if the graph is cyclic, otherwise "".
   230  func cyclic(graph map[ImportPath]*build.Instance) string {
   231  	const (
   232  		unvisited = 0
   233  		visited   = 1
   234  		onstack   = 2
   235  	)
   236  	color := make(map[ImportPath]int)
   237  	var visit func(inst *build.Instance) string
   238  	visit = func(inst *build.Instance) string {
   239  		path := ImportPath(inst.ImportPath)
   240  		switch color[path] {
   241  		case unvisited:
   242  			color[path] = onstack
   243  		case onstack:
   244  			return string(path) // cycle!
   245  		case visited:
   246  			return ""
   247  		}
   248  		for _, importedInst := range inst.Imports {
   249  			if cycle := visit(importedInst); cycle != "" {
   250  				return string(path) + "->" + cycle
   251  			}
   252  		}
   253  		color[path] = visited
   254  		return ""
   255  	}
   256  	for _, inst := range graph {
   257  		if cycle := visit(inst); cycle != "" {
   258  			return cycle
   259  		}
   260  	}
   261  	return ""
   262  }
   263  
   264  // detectImportCycles reports cycles in the metadata graph. It returns a new
   265  // unordered array of all cycles (nontrivial strong components) in the
   266  // metadata graph reachable from a non-nil 'updates' value.
   267  func detectImportCycles(metadata, updates map[PackageID]*Package) [][]*Package {
   268  	// We use the depth-first algorithm of Tarjan.
   269  	// https://doi.org/10.1137/0201010
   270  	//
   271  	// TODO(adonovan): when we can use generics, consider factoring
   272  	// in common with the other implementation of Tarjan (in typerefs),
   273  	// abstracting over the node and edge representation.
   274  
   275  	// A node wraps a Metadata with its working state.
   276  	// (Unfortunately we can't intrude on shared Metadata.)
   277  	type node struct {
   278  		rep            *node
   279  		mp             *Package
   280  		index, lowlink int32
   281  		scc            int8 // TODO(adonovan): opt: cram these 1.5 bits into previous word
   282  	}
   283  	nodes := make(map[PackageID]*node, len(metadata))
   284  	nodeOf := func(id PackageID) *node {
   285  		n, ok := nodes[id]
   286  		if !ok {
   287  			mp := metadata[id]
   288  			if mp == nil {
   289  				// Dangling import edge.
   290  				// Not sure whether a go/packages driver ever
   291  				// emits this, but create a dummy node in case.
   292  				// Obviously it won't be part of any cycle.
   293  				mp = &Package{ID: id}
   294  			}
   295  			n = &node{mp: mp}
   296  			n.rep = n
   297  			nodes[id] = n
   298  		}
   299  		return n
   300  	}
   301  
   302  	// find returns the canonical node decl.
   303  	// (The nodes form a disjoint set forest.)
   304  	var find func(*node) *node
   305  	find = func(n *node) *node {
   306  		rep := n.rep
   307  		if rep != n {
   308  			rep = find(rep)
   309  			n.rep = rep // simple path compression (no union-by-rank)
   310  		}
   311  		return rep
   312  	}
   313  
   314  	// global state
   315  	var (
   316  		index int32 = 1
   317  		stack []*node
   318  		sccs  [][]*Package // set of nontrivial strongly connected components
   319  	)
   320  
   321  	// visit implements the depth-first search of Tarjan's SCC algorithm
   322  	// Precondition: x is canonical.
   323  	var visit func(*node)
   324  	visit = func(x *node) {
   325  		x.index = index
   326  		x.lowlink = index
   327  		index++
   328  
   329  		stack = append(stack, x) // push
   330  		x.scc = -1
   331  
   332  		for _, yid := range x.mp.DepsByPkgPath {
   333  			y := nodeOf(yid)
   334  			// Loop invariant: x is canonical.
   335  			y = find(y)
   336  			if x == y {
   337  				continue // nodes already combined (self-edges are impossible)
   338  			}
   339  
   340  			switch {
   341  			case y.scc > 0:
   342  				// y is already a collapsed SCC
   343  
   344  			case y.scc < 0:
   345  				// y is on the stack, and thus in the current SCC.
   346  				if y.index < x.lowlink {
   347  					x.lowlink = y.index
   348  				}
   349  
   350  			default:
   351  				// y is unvisited; visit it now.
   352  				visit(y)
   353  				// Note: x and y are now non-canonical.
   354  				x = find(x)
   355  				if y.lowlink < x.lowlink {
   356  					x.lowlink = y.lowlink
   357  				}
   358  			}
   359  		}
   360  
   361  		// Is x the root of an SCC?
   362  		if x.lowlink == x.index {
   363  			// Gather all metadata in the SCC (if nontrivial).
   364  			var scc []*Package
   365  			for {
   366  				// Pop y from stack.
   367  				i := len(stack) - 1
   368  				y := stack[i]
   369  				stack = stack[:i]
   370  				if x != y || scc != nil {
   371  					scc = append(scc, y.mp)
   372  				}
   373  				if x == y {
   374  					break // complete
   375  				}
   376  				// x becomes y's canonical representative.
   377  				y.rep = x
   378  			}
   379  			if scc != nil {
   380  				sccs = append(sccs, scc)
   381  			}
   382  			x.scc = 1
   383  		}
   384  	}
   385  
   386  	// Visit only the updated nodes:
   387  	// the existing metadata graph has no cycles,
   388  	// so any new cycle must involve an updated node.
   389  	for id, mp := range updates {
   390  		if mp != nil {
   391  			if n := nodeOf(id); n.index == 0 { // unvisited
   392  				visit(n)
   393  			}
   394  		}
   395  	}
   396  
   397  	return sccs
   398  }