golang.org/x/tools/gopls@v0.15.3/internal/cache/metadata/graph.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package metadata 6 7 import ( 8 "sort" 9 10 "golang.org/x/tools/go/packages" 11 "golang.org/x/tools/gopls/internal/protocol" 12 "golang.org/x/tools/gopls/internal/util/bug" 13 ) 14 15 // A Graph is an immutable and transitively closed graph of [Package] data. 16 type Graph struct { 17 // Packages maps package IDs to their associated Packages. 18 Packages map[PackageID]*Package 19 20 // ImportedBy maps package IDs to the list of packages that import them. 21 ImportedBy map[PackageID][]PackageID 22 23 // IDs maps file URIs to package IDs, sorted by (!valid, cli, packageID). 24 // A single file may belong to multiple packages due to tests packages. 25 // 26 // Invariant: all IDs present in the IDs map exist in the metadata map. 27 IDs map[protocol.DocumentURI][]PackageID 28 } 29 30 // Update creates a new Graph containing the result of applying the given 31 // updates to the receiver, though the receiver is not itself mutated. As a 32 // special case, if updates is empty, Update just returns the receiver. 33 // 34 // A nil map value is used to indicate a deletion. 35 func (g *Graph) Update(updates map[PackageID]*Package) *Graph { 36 if len(updates) == 0 { 37 // Optimization: since the graph is immutable, we can return the receiver. 38 return g 39 } 40 41 // Debugging golang/go#64227, golang/vscode-go#3126: 42 // Assert that the existing metadata graph is acyclic. 43 if cycle := cyclic(g.Packages); cycle != "" { 44 bug.Reportf("metadata is cyclic even before updates: %s", cycle) 45 } 46 // Assert that the updates contain no self-cycles. 47 for id, mp := range updates { 48 if mp != nil { 49 for _, depID := range mp.DepsByPkgPath { 50 if depID == id { 51 bug.Reportf("self-cycle in metadata update: %s", id) 52 } 53 } 54 } 55 } 56 57 // Copy pkgs map then apply updates. 58 pkgs := make(map[PackageID]*Package, len(g.Packages)) 59 for id, mp := range g.Packages { 60 pkgs[id] = mp 61 } 62 for id, mp := range updates { 63 if mp == nil { 64 delete(pkgs, id) 65 } else { 66 pkgs[id] = mp 67 } 68 } 69 70 // Break import cycles involving updated nodes. 71 breakImportCycles(pkgs, updates) 72 73 return newGraph(pkgs) 74 } 75 76 // newGraph returns a new metadataGraph, 77 // deriving relations from the specified metadata. 78 func newGraph(pkgs map[PackageID]*Package) *Graph { 79 // Build the import graph. 80 importedBy := make(map[PackageID][]PackageID) 81 for id, mp := range pkgs { 82 for _, depID := range mp.DepsByPkgPath { 83 importedBy[depID] = append(importedBy[depID], id) 84 } 85 } 86 87 // Collect file associations. 88 uriIDs := make(map[protocol.DocumentURI][]PackageID) 89 for id, mp := range pkgs { 90 uris := map[protocol.DocumentURI]struct{}{} 91 for _, uri := range mp.CompiledGoFiles { 92 uris[uri] = struct{}{} 93 } 94 for _, uri := range mp.GoFiles { 95 uris[uri] = struct{}{} 96 } 97 for uri := range uris { 98 uriIDs[uri] = append(uriIDs[uri], id) 99 } 100 } 101 102 // Sort and filter file associations. 103 for uri, ids := range uriIDs { 104 sort.Slice(ids, func(i, j int) bool { 105 cli := IsCommandLineArguments(ids[i]) 106 clj := IsCommandLineArguments(ids[j]) 107 if cli != clj { 108 return clj 109 } 110 111 // 2. packages appear in name order. 112 return ids[i] < ids[j] 113 }) 114 115 // Choose the best IDs for each URI, according to the following rules: 116 // - If there are any valid real packages, choose them. 117 // - Else, choose the first valid command-line-argument package, if it exists. 118 // 119 // TODO(rfindley): it might be better to track all IDs here, and exclude 120 // them later when type checking, but this is the existing behavior. 121 for i, id := range ids { 122 // If we've seen *anything* prior to command-line arguments package, take 123 // it. Note that ids[0] may itself be command-line-arguments. 124 if i > 0 && IsCommandLineArguments(id) { 125 uriIDs[uri] = ids[:i] 126 break 127 } 128 } 129 } 130 131 return &Graph{ 132 Packages: pkgs, 133 ImportedBy: importedBy, 134 IDs: uriIDs, 135 } 136 } 137 138 // ReverseReflexiveTransitiveClosure returns a new mapping containing the 139 // metadata for the specified packages along with any package that 140 // transitively imports one of them, keyed by ID, including all the initial packages. 141 func (g *Graph) ReverseReflexiveTransitiveClosure(ids ...PackageID) map[PackageID]*Package { 142 seen := make(map[PackageID]*Package) 143 var visitAll func([]PackageID) 144 visitAll = func(ids []PackageID) { 145 for _, id := range ids { 146 if seen[id] == nil { 147 if mp := g.Packages[id]; mp != nil { 148 seen[id] = mp 149 visitAll(g.ImportedBy[id]) 150 } 151 } 152 } 153 } 154 visitAll(ids) 155 return seen 156 } 157 158 // breakImportCycles breaks import cycles in the metadata by deleting 159 // Deps* edges. It modifies only metadata present in the 'updates' 160 // subset. This function has an internal test. 161 func breakImportCycles(metadata, updates map[PackageID]*Package) { 162 // 'go list' should never report a cycle without flagging it 163 // as such, but we're extra cautious since we're combining 164 // information from multiple runs of 'go list'. Also, Bazel 165 // may silently report cycles. 166 cycles := detectImportCycles(metadata, updates) 167 if len(cycles) > 0 { 168 // There were cycles (uncommon). Break them. 169 // 170 // The naive way to break cycles would be to perform a 171 // depth-first traversal and to detect and delete 172 // cycle-forming edges as we encounter them. 173 // However, we're not allowed to modify the existing 174 // Metadata records, so we can only break edges out of 175 // the 'updates' subset. 176 // 177 // Another possibility would be to delete not the 178 // cycle forming edge but the topmost edge on the 179 // stack whose tail is an updated node. 180 // However, this would require that we retroactively 181 // undo all the effects of the traversals that 182 // occurred since that edge was pushed on the stack. 183 // 184 // We use a simpler scheme: we compute the set of cycles. 185 // All cyclic paths necessarily involve at least one 186 // updated node, so it is sufficient to break all 187 // edges from each updated node to other members of 188 // the strong component. 189 // 190 // This may result in the deletion of dominating 191 // edges, causing some dependencies to appear 192 // spuriously unreachable. Consider A <-> B -> C 193 // where updates={A,B}. The cycle is {A,B} so the 194 // algorithm will break both A->B and B->A, causing 195 // A to no longer depend on B or C. 196 // 197 // But that's ok: any error in Metadata.Errors is 198 // conservatively assumed by snapshot.clone to be a 199 // potential import cycle error, and causes special 200 // invalidation so that if B later drops its 201 // cycle-forming import of A, both A and B will be 202 // invalidated. 203 for _, cycle := range cycles { 204 cyclic := make(map[PackageID]bool) 205 for _, mp := range cycle { 206 cyclic[mp.ID] = true 207 } 208 for id := range cyclic { 209 if mp := updates[id]; mp != nil { 210 for path, depID := range mp.DepsByImpPath { 211 if cyclic[depID] { 212 delete(mp.DepsByImpPath, path) 213 } 214 } 215 for path, depID := range mp.DepsByPkgPath { 216 if cyclic[depID] { 217 delete(mp.DepsByPkgPath, path) 218 } 219 } 220 221 // Set m.Errors to enable special 222 // invalidation logic in snapshot.clone. 223 if len(mp.Errors) == 0 { 224 mp.Errors = []packages.Error{{ 225 Msg: "detected import cycle", 226 Kind: packages.ListError, 227 }} 228 } 229 } 230 } 231 } 232 233 // double-check when debugging 234 if false { 235 if cycles := detectImportCycles(metadata, updates); len(cycles) > 0 { 236 bug.Reportf("unbroken cycle: %v", cycles) 237 } 238 } 239 } 240 } 241 242 // cyclic returns a description of a cycle, 243 // if the graph is cyclic, otherwise "". 244 func cyclic(graph map[PackageID]*Package) string { 245 const ( 246 unvisited = 0 247 visited = 1 248 onstack = 2 249 ) 250 color := make(map[PackageID]int) 251 var visit func(id PackageID) string 252 visit = func(id PackageID) string { 253 switch color[id] { 254 case unvisited: 255 color[id] = onstack 256 case onstack: 257 return string(id) // cycle! 258 case visited: 259 return "" 260 } 261 if mp := graph[id]; mp != nil { 262 for _, depID := range mp.DepsByPkgPath { 263 if cycle := visit(depID); cycle != "" { 264 return string(id) + "->" + cycle 265 } 266 } 267 } 268 color[id] = visited 269 return "" 270 } 271 for id := range graph { 272 if cycle := visit(id); cycle != "" { 273 return cycle 274 } 275 } 276 return "" 277 } 278 279 // detectImportCycles reports cycles in the metadata graph. It returns a new 280 // unordered array of all cycles (nontrivial strong components) in the 281 // metadata graph reachable from a non-nil 'updates' value. 282 func detectImportCycles(metadata, updates map[PackageID]*Package) [][]*Package { 283 // We use the depth-first algorithm of Tarjan. 284 // https://doi.org/10.1137/0201010 285 // 286 // TODO(adonovan): when we can use generics, consider factoring 287 // in common with the other implementation of Tarjan (in typerefs), 288 // abstracting over the node and edge representation. 289 290 // A node wraps a Metadata with its working state. 291 // (Unfortunately we can't intrude on shared Metadata.) 292 type node struct { 293 rep *node 294 mp *Package 295 index, lowlink int32 296 scc int8 // TODO(adonovan): opt: cram these 1.5 bits into previous word 297 } 298 nodes := make(map[PackageID]*node, len(metadata)) 299 nodeOf := func(id PackageID) *node { 300 n, ok := nodes[id] 301 if !ok { 302 mp := metadata[id] 303 if mp == nil { 304 // Dangling import edge. 305 // Not sure whether a go/packages driver ever 306 // emits this, but create a dummy node in case. 307 // Obviously it won't be part of any cycle. 308 mp = &Package{ID: id} 309 } 310 n = &node{mp: mp} 311 n.rep = n 312 nodes[id] = n 313 } 314 return n 315 } 316 317 // find returns the canonical node decl. 318 // (The nodes form a disjoint set forest.) 319 var find func(*node) *node 320 find = func(n *node) *node { 321 rep := n.rep 322 if rep != n { 323 rep = find(rep) 324 n.rep = rep // simple path compression (no union-by-rank) 325 } 326 return rep 327 } 328 329 // global state 330 var ( 331 index int32 = 1 332 stack []*node 333 sccs [][]*Package // set of nontrivial strongly connected components 334 ) 335 336 // visit implements the depth-first search of Tarjan's SCC algorithm 337 // Precondition: x is canonical. 338 var visit func(*node) 339 visit = func(x *node) { 340 x.index = index 341 x.lowlink = index 342 index++ 343 344 stack = append(stack, x) // push 345 x.scc = -1 346 347 for _, yid := range x.mp.DepsByPkgPath { 348 y := nodeOf(yid) 349 // Loop invariant: x is canonical. 350 y = find(y) 351 if x == y { 352 continue // nodes already combined (self-edges are impossible) 353 } 354 355 switch { 356 case y.scc > 0: 357 // y is already a collapsed SCC 358 359 case y.scc < 0: 360 // y is on the stack, and thus in the current SCC. 361 if y.index < x.lowlink { 362 x.lowlink = y.index 363 } 364 365 default: 366 // y is unvisited; visit it now. 367 visit(y) 368 // Note: x and y are now non-canonical. 369 x = find(x) 370 if y.lowlink < x.lowlink { 371 x.lowlink = y.lowlink 372 } 373 } 374 } 375 376 // Is x the root of an SCC? 377 if x.lowlink == x.index { 378 // Gather all metadata in the SCC (if nontrivial). 379 var scc []*Package 380 for { 381 // Pop y from stack. 382 i := len(stack) - 1 383 y := stack[i] 384 stack = stack[:i] 385 if x != y || scc != nil { 386 scc = append(scc, y.mp) 387 } 388 if x == y { 389 break // complete 390 } 391 // x becomes y's canonical representative. 392 y.rep = x 393 } 394 if scc != nil { 395 sccs = append(sccs, scc) 396 } 397 x.scc = 1 398 } 399 } 400 401 // Visit only the updated nodes: 402 // the existing metadata graph has no cycles, 403 // so any new cycle must involve an updated node. 404 for id, mp := range updates { 405 if mp != nil { 406 if n := nodeOf(id); n.index == 0 { // unvisited 407 visit(n) 408 } 409 } 410 } 411 412 return sccs 413 }