github.com/powerman/golang-tools@v0.1.11-0.20220410185822-5ad214d8d803/cmd/digraph/digraph.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 The digraph command performs queries over unlabelled directed graphs 7 represented in text form. It is intended to integrate nicely with 8 typical UNIX command pipelines. 9 10 Usage: 11 12 your-application | digraph [command] 13 14 The support commands are: 15 16 nodes 17 the set of all nodes 18 degree 19 the in-degree and out-degree of each node 20 transpose 21 the reverse of the input edges 22 preds <node> ... 23 the set of immediate predecessors of the specified nodes 24 succs <node> ... 25 the set of immediate successors of the specified nodes 26 forward <node> ... 27 the set of nodes transitively reachable from the specified nodes 28 reverse <node> ... 29 the set of nodes that transitively reach the specified nodes 30 somepath <node> <node> 31 the list of nodes on some arbitrary path from the first node to the second 32 allpaths <node> <node> 33 the set of nodes on all paths from the first node to the second 34 sccs 35 all strongly connected components (one per line) 36 scc <node> 37 the set of nodes nodes strongly connected to the specified one 38 focus <node> 39 the subgraph containing all directed paths that pass through the specified node 40 41 Input format: 42 43 Each line contains zero or more words. Words are separated by unquoted 44 whitespace; words may contain Go-style double-quoted portions, allowing spaces 45 and other characters to be expressed. 46 47 Each word declares a node, and if there are more than one, an edge from the 48 first to each subsequent one. The graph is provided on the standard input. 49 50 For instance, the following (acyclic) graph specifies a partial order among the 51 subtasks of getting dressed: 52 53 $ cat clothes.txt 54 socks shoes 55 "boxer shorts" pants 56 pants belt shoes 57 shirt tie sweater 58 sweater jacket 59 hat 60 61 The line "shirt tie sweater" indicates the two edges shirt -> tie and 62 shirt -> sweater, not shirt -> tie -> sweater. 63 64 Example usage: 65 66 Using digraph with existing Go tools: 67 68 $ go mod graph | digraph nodes # Operate on the Go module graph. 69 $ go list -m all | digraph nodes # Operate on the Go package graph. 70 71 Show the transitive closure of imports of the digraph tool itself: 72 $ go list -f '{{.ImportPath}} {{join .Imports " "}}' ... | digraph forward github.com/powerman/golang-tools/cmd/digraph 73 74 Show which clothes (see above) must be donned before a jacket: 75 $ digraph reverse jacket 76 77 */ 78 package main // import "github.com/powerman/golang-tools/cmd/digraph" 79 80 // TODO(adonovan): 81 // - support input files other than stdin 82 // - support alternative formats (AT&T GraphViz, CSV, etc), 83 // a comment syntax, etc. 84 // - allow queries to nest, like Blaze query language. 85 86 import ( 87 "bufio" 88 "bytes" 89 "errors" 90 "flag" 91 "fmt" 92 "io" 93 "os" 94 "sort" 95 "strconv" 96 "strings" 97 "unicode" 98 "unicode/utf8" 99 ) 100 101 func usage() { 102 fmt.Fprintf(os.Stderr, `Usage: your-application | digraph [command] 103 104 The support commands are: 105 nodes 106 the set of all nodes 107 degree 108 the in-degree and out-degree of each node 109 transpose 110 the reverse of the input edges 111 preds <node> ... 112 the set of immediate predecessors of the specified nodes 113 succs <node> ... 114 the set of immediate successors of the specified nodes 115 forward <node> ... 116 the set of nodes transitively reachable from the specified nodes 117 reverse <node> ... 118 the set of nodes that transitively reach the specified nodes 119 somepath <node> <node> 120 the list of nodes on some arbitrary path from the first node to the second 121 allpaths <node> <node> 122 the set of nodes on all paths from the first node to the second 123 sccs 124 all strongly connected components (one per line) 125 scc <node> 126 the set of nodes nodes strongly connected to the specified one 127 focus <node> 128 the subgraph containing all directed paths that pass through the specified node 129 `) 130 os.Exit(2) 131 } 132 133 func main() { 134 flag.Usage = usage 135 flag.Parse() 136 137 args := flag.Args() 138 if len(args) == 0 { 139 usage() 140 } 141 142 if err := digraph(args[0], args[1:]); err != nil { 143 fmt.Fprintf(os.Stderr, "digraph: %s\n", err) 144 os.Exit(1) 145 } 146 } 147 148 type nodelist []string 149 150 func (l nodelist) println(sep string) { 151 for i, node := range l { 152 if i > 0 { 153 fmt.Fprint(stdout, sep) 154 } 155 fmt.Fprint(stdout, node) 156 } 157 fmt.Fprintln(stdout) 158 } 159 160 type nodeset map[string]bool // TODO(deklerk): change bool to struct to reduce memory footprint 161 162 func (s nodeset) sort() nodelist { 163 nodes := make(nodelist, len(s)) 164 var i int 165 for node := range s { 166 nodes[i] = node 167 i++ 168 } 169 sort.Strings(nodes) 170 return nodes 171 } 172 173 func (s nodeset) addAll(x nodeset) { 174 for node := range x { 175 s[node] = true 176 } 177 } 178 179 // A graph maps nodes to the non-nil set of their immediate successors. 180 type graph map[string]nodeset 181 182 func (g graph) addNode(node string) nodeset { 183 edges := g[node] 184 if edges == nil { 185 edges = make(nodeset) 186 g[node] = edges 187 } 188 return edges 189 } 190 191 func (g graph) addEdges(from string, to ...string) { 192 edges := g.addNode(from) 193 for _, to := range to { 194 g.addNode(to) 195 edges[to] = true 196 } 197 } 198 199 func (g graph) reachableFrom(roots nodeset) nodeset { 200 seen := make(nodeset) 201 var visit func(node string) 202 visit = func(node string) { 203 if !seen[node] { 204 seen[node] = true 205 for e := range g[node] { 206 visit(e) 207 } 208 } 209 } 210 for root := range roots { 211 visit(root) 212 } 213 return seen 214 } 215 216 func (g graph) transpose() graph { 217 rev := make(graph) 218 for node, edges := range g { 219 rev.addNode(node) 220 for succ := range edges { 221 rev.addEdges(succ, node) 222 } 223 } 224 return rev 225 } 226 227 func (g graph) sccs() []nodeset { 228 // Kosaraju's algorithm---Tarjan is overkill here. 229 230 // Forward pass. 231 S := make(nodelist, 0, len(g)) // postorder stack 232 seen := make(nodeset) 233 var visit func(node string) 234 visit = func(node string) { 235 if !seen[node] { 236 seen[node] = true 237 for e := range g[node] { 238 visit(e) 239 } 240 S = append(S, node) 241 } 242 } 243 for node := range g { 244 visit(node) 245 } 246 247 // Reverse pass. 248 rev := g.transpose() 249 var scc nodeset 250 seen = make(nodeset) 251 var rvisit func(node string) 252 rvisit = func(node string) { 253 if !seen[node] { 254 seen[node] = true 255 scc[node] = true 256 for e := range rev[node] { 257 rvisit(e) 258 } 259 } 260 } 261 var sccs []nodeset 262 for len(S) > 0 { 263 top := S[len(S)-1] 264 S = S[:len(S)-1] // pop 265 if !seen[top] { 266 scc = make(nodeset) 267 rvisit(top) 268 sccs = append(sccs, scc) 269 } 270 } 271 return sccs 272 } 273 274 func (g graph) allpaths(from, to string) error { 275 // Mark all nodes to "to". 276 seen := make(nodeset) // value of seen[x] indicates whether x is on some path to "to" 277 var visit func(node string) bool 278 visit = func(node string) bool { 279 reachesTo, ok := seen[node] 280 if !ok { 281 reachesTo = node == to 282 seen[node] = reachesTo 283 for e := range g[node] { 284 if visit(e) { 285 reachesTo = true 286 } 287 } 288 if reachesTo && node != to { 289 seen[node] = true 290 } 291 } 292 return reachesTo 293 } 294 visit(from) 295 296 // For each marked node, collect its marked successors. 297 var edges []string 298 for n := range seen { 299 for succ := range g[n] { 300 if seen[succ] { 301 edges = append(edges, n+" "+succ) 302 } 303 } 304 } 305 306 // Sort (so that this method is deterministic) and print edges. 307 sort.Strings(edges) 308 for _, e := range edges { 309 fmt.Fprintln(stdout, e) 310 } 311 312 return nil 313 } 314 315 func (g graph) somepath(from, to string) error { 316 type edge struct{ from, to string } 317 seen := make(nodeset) 318 var dfs func(path []edge, from string) bool 319 dfs = func(path []edge, from string) bool { 320 if !seen[from] { 321 seen[from] = true 322 if from == to { 323 // fmt.Println(path, len(path), cap(path)) 324 // Print and unwind. 325 for _, e := range path { 326 fmt.Fprintln(stdout, e.from+" "+e.to) 327 } 328 return true 329 } 330 for e := range g[from] { 331 if dfs(append(path, edge{from: from, to: e}), e) { 332 return true 333 } 334 } 335 } 336 return false 337 } 338 maxEdgesInGraph := len(g) * (len(g) - 1) 339 if !dfs(make([]edge, 0, maxEdgesInGraph), from) { 340 return fmt.Errorf("no path from %q to %q", from, to) 341 } 342 return nil 343 } 344 345 func parse(rd io.Reader) (graph, error) { 346 g := make(graph) 347 348 var linenum int 349 in := bufio.NewScanner(rd) 350 for in.Scan() { 351 linenum++ 352 // Split into words, honoring double-quotes per Go spec. 353 words, err := split(in.Text()) 354 if err != nil { 355 return nil, fmt.Errorf("at line %d: %v", linenum, err) 356 } 357 if len(words) > 0 { 358 g.addEdges(words[0], words[1:]...) 359 } 360 } 361 if err := in.Err(); err != nil { 362 return nil, err 363 } 364 return g, nil 365 } 366 367 // Overridable for testing purposes. 368 var stdin io.Reader = os.Stdin 369 var stdout io.Writer = os.Stdout 370 371 func digraph(cmd string, args []string) error { 372 // Parse the input graph. 373 g, err := parse(stdin) 374 if err != nil { 375 return err 376 } 377 378 // Parse the command line. 379 switch cmd { 380 case "nodes": 381 if len(args) != 0 { 382 return fmt.Errorf("usage: digraph nodes") 383 } 384 nodes := make(nodeset) 385 for node := range g { 386 nodes[node] = true 387 } 388 nodes.sort().println("\n") 389 390 case "degree": 391 if len(args) != 0 { 392 return fmt.Errorf("usage: digraph degree") 393 } 394 nodes := make(nodeset) 395 for node := range g { 396 nodes[node] = true 397 } 398 rev := g.transpose() 399 for _, node := range nodes.sort() { 400 fmt.Fprintf(stdout, "%d\t%d\t%s\n", len(rev[node]), len(g[node]), node) 401 } 402 403 case "transpose": 404 if len(args) != 0 { 405 return fmt.Errorf("usage: digraph transpose") 406 } 407 var revEdges []string 408 for node, succs := range g.transpose() { 409 for succ := range succs { 410 revEdges = append(revEdges, fmt.Sprintf("%s %s", node, succ)) 411 } 412 } 413 sort.Strings(revEdges) // make output deterministic 414 for _, e := range revEdges { 415 fmt.Fprintln(stdout, e) 416 } 417 418 case "succs", "preds": 419 if len(args) == 0 { 420 return fmt.Errorf("usage: digraph %s <node> ... ", cmd) 421 } 422 g := g 423 if cmd == "preds" { 424 g = g.transpose() 425 } 426 result := make(nodeset) 427 for _, root := range args { 428 edges := g[root] 429 if edges == nil { 430 return fmt.Errorf("no such node %q", root) 431 } 432 result.addAll(edges) 433 } 434 result.sort().println("\n") 435 436 case "forward", "reverse": 437 if len(args) == 0 { 438 return fmt.Errorf("usage: digraph %s <node> ... ", cmd) 439 } 440 roots := make(nodeset) 441 for _, root := range args { 442 if g[root] == nil { 443 return fmt.Errorf("no such node %q", root) 444 } 445 roots[root] = true 446 } 447 g := g 448 if cmd == "reverse" { 449 g = g.transpose() 450 } 451 g.reachableFrom(roots).sort().println("\n") 452 453 case "somepath": 454 if len(args) != 2 { 455 return fmt.Errorf("usage: digraph somepath <from> <to>") 456 } 457 from, to := args[0], args[1] 458 if g[from] == nil { 459 return fmt.Errorf("no such 'from' node %q", from) 460 } 461 if g[to] == nil { 462 return fmt.Errorf("no such 'to' node %q", to) 463 } 464 if err := g.somepath(from, to); err != nil { 465 return err 466 } 467 468 case "allpaths": 469 if len(args) != 2 { 470 return fmt.Errorf("usage: digraph allpaths <from> <to>") 471 } 472 from, to := args[0], args[1] 473 if g[from] == nil { 474 return fmt.Errorf("no such 'from' node %q", from) 475 } 476 if g[to] == nil { 477 return fmt.Errorf("no such 'to' node %q", to) 478 } 479 if err := g.allpaths(from, to); err != nil { 480 return err 481 } 482 483 case "sccs": 484 if len(args) != 0 { 485 return fmt.Errorf("usage: digraph sccs") 486 } 487 for _, scc := range g.sccs() { 488 scc.sort().println(" ") 489 } 490 491 case "scc": 492 if len(args) != 1 { 493 return fmt.Errorf("usage: digraph scc <node>") 494 } 495 node := args[0] 496 if g[node] == nil { 497 return fmt.Errorf("no such node %q", node) 498 } 499 for _, scc := range g.sccs() { 500 if scc[node] { 501 scc.sort().println("\n") 502 break 503 } 504 } 505 506 case "focus": 507 if len(args) != 1 { 508 return fmt.Errorf("usage: digraph focus <node>") 509 } 510 node := args[0] 511 if g[node] == nil { 512 return fmt.Errorf("no such node %q", node) 513 } 514 515 edges := make(map[string]struct{}) 516 for from := range g.reachableFrom(nodeset{node: true}) { 517 for to := range g[from] { 518 edges[fmt.Sprintf("%s %s", from, to)] = struct{}{} 519 } 520 } 521 522 gtrans := g.transpose() 523 for from := range gtrans.reachableFrom(nodeset{node: true}) { 524 for to := range gtrans[from] { 525 edges[fmt.Sprintf("%s %s", to, from)] = struct{}{} 526 } 527 } 528 529 edgesSorted := make([]string, 0, len(edges)) 530 for e := range edges { 531 edgesSorted = append(edgesSorted, e) 532 } 533 sort.Strings(edgesSorted) 534 fmt.Fprintln(stdout, strings.Join(edgesSorted, "\n")) 535 536 default: 537 return fmt.Errorf("no such command %q", cmd) 538 } 539 540 return nil 541 } 542 543 // -- Utilities -------------------------------------------------------- 544 545 // split splits a line into words, which are generally separated by 546 // spaces, but Go-style double-quoted string literals are also supported. 547 // (This approximates the behaviour of the Bourne shell.) 548 // 549 // `one "two three"` -> ["one" "two three"] 550 // `a"\n"b` -> ["a\nb"] 551 // 552 func split(line string) ([]string, error) { 553 var ( 554 words []string 555 inWord bool 556 current bytes.Buffer 557 ) 558 559 for len(line) > 0 { 560 r, size := utf8.DecodeRuneInString(line) 561 if unicode.IsSpace(r) { 562 if inWord { 563 words = append(words, current.String()) 564 current.Reset() 565 inWord = false 566 } 567 } else if r == '"' { 568 var ok bool 569 size, ok = quotedLength(line) 570 if !ok { 571 return nil, errors.New("invalid quotation") 572 } 573 s, err := strconv.Unquote(line[:size]) 574 if err != nil { 575 return nil, err 576 } 577 current.WriteString(s) 578 inWord = true 579 } else { 580 current.WriteRune(r) 581 inWord = true 582 } 583 line = line[size:] 584 } 585 if inWord { 586 words = append(words, current.String()) 587 } 588 return words, nil 589 } 590 591 // quotedLength returns the length in bytes of the prefix of input that 592 // contain a possibly-valid double-quoted Go string literal. 593 // 594 // On success, n is at least two (""); input[:n] may be passed to 595 // strconv.Unquote to interpret its value, and input[n:] contains the 596 // rest of the input. 597 // 598 // On failure, quotedLength returns false, and the entire input can be 599 // passed to strconv.Unquote if an informative error message is desired. 600 // 601 // quotedLength does not and need not detect all errors, such as 602 // invalid hex or octal escape sequences, since it assumes 603 // strconv.Unquote will be applied to the prefix. It guarantees only 604 // that if there is a prefix of input containing a valid string literal, 605 // its length is returned. 606 // 607 // TODO(adonovan): move this into a strconv-like utility package. 608 // 609 func quotedLength(input string) (n int, ok bool) { 610 var offset int 611 612 // next returns the rune at offset, or -1 on EOF. 613 // offset advances to just after that rune. 614 next := func() rune { 615 if offset < len(input) { 616 r, size := utf8.DecodeRuneInString(input[offset:]) 617 offset += size 618 return r 619 } 620 return -1 621 } 622 623 if next() != '"' { 624 return // error: not a quotation 625 } 626 627 for { 628 r := next() 629 if r == '\n' || r < 0 { 630 return // error: string literal not terminated 631 } 632 if r == '"' { 633 return offset, true // success 634 } 635 if r == '\\' { 636 var skip int 637 switch next() { 638 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"': 639 skip = 0 640 case '0', '1', '2', '3', '4', '5', '6', '7': 641 skip = 2 642 case 'x': 643 skip = 2 644 case 'u': 645 skip = 4 646 case 'U': 647 skip = 8 648 default: 649 return // error: invalid escape 650 } 651 652 for i := 0; i < skip; i++ { 653 next() 654 } 655 } 656 } 657 }