github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/godoc/index.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file contains the infrastructure to create an 6 // identifier and full-text index for a set of Go files. 7 // 8 // Algorithm for identifier index: 9 // - traverse all .go files of the file tree specified by root 10 // - for each identifier (word) encountered, collect all occurrences (spots) 11 // into a list; this produces a list of spots for each word 12 // - reduce the lists: from a list of spots to a list of FileRuns, 13 // and from a list of FileRuns into a list of PakRuns 14 // - make a HitList from the PakRuns 15 // 16 // Details: 17 // - keep two lists per word: one containing package-level declarations 18 // that have snippets, and one containing all other spots 19 // - keep the snippets in a separate table indexed by snippet index 20 // and store the snippet index in place of the line number in a SpotInfo 21 // (the line number for spots with snippets is stored in the snippet) 22 // - at the end, create lists of alternative spellings for a given 23 // word 24 // 25 // Algorithm for full text index: 26 // - concatenate all source code in a byte buffer (in memory) 27 // - add the files to a file set in lockstep as they are added to the byte 28 // buffer such that a byte buffer offset corresponds to the Pos value for 29 // that file location 30 // - create a suffix array from the concatenated sources 31 // 32 // String lookup in full text index: 33 // - use the suffix array to lookup a string's offsets - the offsets 34 // correspond to the Pos values relative to the file set 35 // - translate the Pos values back into file and line information and 36 // sort the result 37 38 package main 39 40 import ( 41 "bufio" 42 "bytes" 43 "encoding/gob" 44 "errors" 45 "go/ast" 46 "go/parser" 47 "go/token" 48 "index/suffixarray" 49 "io" 50 "os" 51 pathpkg "path" 52 "regexp" 53 "sort" 54 "strings" 55 "time" 56 "unicode" 57 ) 58 59 // ---------------------------------------------------------------------------- 60 // InterfaceSlice is a helper type for sorting interface 61 // slices according to some slice-specific sort criteria. 62 63 type Comparer func(x, y interface{}) bool 64 65 type InterfaceSlice struct { 66 slice []interface{} 67 less Comparer 68 } 69 70 func (p *InterfaceSlice) Len() int { return len(p.slice) } 71 func (p *InterfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) } 72 func (p *InterfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] } 73 74 // ---------------------------------------------------------------------------- 75 // RunList 76 77 // A RunList is a list of entries that can be sorted according to some 78 // criteria. A RunList may be compressed by grouping "runs" of entries 79 // which are equal (according to the sort critera) into a new RunList of 80 // runs. For instance, a RunList containing pairs (x, y) may be compressed 81 // into a RunList containing pair runs (x, {y}) where each run consists of 82 // a list of y's with the same x. 83 type RunList []interface{} 84 85 func (h RunList) sort(less Comparer) { 86 sort.Sort(&InterfaceSlice{h, less}) 87 } 88 89 // Compress entries which are the same according to a sort criteria 90 // (specified by less) into "runs". 91 func (h RunList) reduce(less Comparer, newRun func(h RunList) interface{}) RunList { 92 if len(h) == 0 { 93 return nil 94 } 95 // len(h) > 0 96 97 // create runs of entries with equal values 98 h.sort(less) 99 100 // for each run, make a new run object and collect them in a new RunList 101 var hh RunList 102 i, x := 0, h[0] 103 for j, y := range h { 104 if less(x, y) { 105 hh = append(hh, newRun(h[i:j])) 106 i, x = j, h[j] // start a new run 107 } 108 } 109 // add final run, if any 110 if i < len(h) { 111 hh = append(hh, newRun(h[i:])) 112 } 113 114 return hh 115 } 116 117 // ---------------------------------------------------------------------------- 118 // SpotInfo 119 120 // A SpotInfo value describes a particular identifier spot in a given file; 121 // It encodes three values: the SpotKind (declaration or use), a line or 122 // snippet index "lori", and whether it's a line or index. 123 // 124 // The following encoding is used: 125 // 126 // bits 32 4 1 0 127 // value [lori|kind|isIndex] 128 // 129 type SpotInfo uint32 130 131 // SpotKind describes whether an identifier is declared (and what kind of 132 // declaration) or used. 133 type SpotKind uint32 134 135 const ( 136 PackageClause SpotKind = iota 137 ImportDecl 138 ConstDecl 139 TypeDecl 140 VarDecl 141 FuncDecl 142 MethodDecl 143 Use 144 nKinds 145 ) 146 147 func init() { 148 // sanity check: if nKinds is too large, the SpotInfo 149 // accessor functions may need to be updated 150 if nKinds > 8 { 151 panic("internal error: nKinds > 8") 152 } 153 } 154 155 // makeSpotInfo makes a SpotInfo. 156 func makeSpotInfo(kind SpotKind, lori int, isIndex bool) SpotInfo { 157 // encode lori: bits [4..32) 158 x := SpotInfo(lori) << 4 159 if int(x>>4) != lori { 160 // lori value doesn't fit - since snippet indices are 161 // most certainly always smaller then 1<<28, this can 162 // only happen for line numbers; give it no line number (= 0) 163 x = 0 164 } 165 // encode kind: bits [1..4) 166 x |= SpotInfo(kind) << 1 167 // encode isIndex: bit 0 168 if isIndex { 169 x |= 1 170 } 171 return x 172 } 173 174 func (x SpotInfo) Kind() SpotKind { return SpotKind(x >> 1 & 7) } 175 func (x SpotInfo) Lori() int { return int(x >> 4) } 176 func (x SpotInfo) IsIndex() bool { return x&1 != 0 } 177 178 // ---------------------------------------------------------------------------- 179 // KindRun 180 181 // Debugging support. Disable to see multiple entries per line. 182 const removeDuplicates = true 183 184 // A KindRun is a run of SpotInfos of the same kind in a given file. 185 // The kind (3 bits) is stored in each SpotInfo element; to find the 186 // kind of a KindRun, look at any of it's elements. 187 type KindRun []SpotInfo 188 189 // KindRuns are sorted by line number or index. Since the isIndex bit 190 // is always the same for all infos in one list we can compare lori's. 191 func (k KindRun) Len() int { return len(k) } 192 func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() } 193 func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] } 194 195 // FileRun contents are sorted by Kind for the reduction into KindRuns. 196 func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() } 197 198 // newKindRun allocates a new KindRun from the SpotInfo run h. 199 func newKindRun(h RunList) interface{} { 200 run := make(KindRun, len(h)) 201 for i, x := range h { 202 run[i] = x.(SpotInfo) 203 } 204 205 // Spots were sorted by file and kind to create this run. 206 // Within this run, sort them by line number or index. 207 sort.Sort(run) 208 209 if removeDuplicates { 210 // Since both the lori and kind field must be 211 // same for duplicates, and since the isIndex 212 // bit is always the same for all infos in one 213 // list we can simply compare the entire info. 214 k := 0 215 prev := SpotInfo(1<<32 - 1) // an unlikely value 216 for _, x := range run { 217 if x != prev { 218 run[k] = x 219 k++ 220 prev = x 221 } 222 } 223 run = run[0:k] 224 } 225 226 return run 227 } 228 229 // ---------------------------------------------------------------------------- 230 // FileRun 231 232 // A Pak describes a Go package. 233 type Pak struct { 234 Path string // path of directory containing the package 235 Name string // package name as declared by package clause 236 } 237 238 // Paks are sorted by name (primary key) and by import path (secondary key). 239 func (p *Pak) less(q *Pak) bool { 240 return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path 241 } 242 243 // A File describes a Go file. 244 type File struct { 245 Name string // directory-local file name 246 Pak *Pak // the package to which the file belongs 247 } 248 249 // Path returns the file path of f. 250 func (f *File) Path() string { 251 return pathpkg.Join(f.Pak.Path, f.Name) 252 } 253 254 // A Spot describes a single occurrence of a word. 255 type Spot struct { 256 File *File 257 Info SpotInfo 258 } 259 260 // A FileRun is a list of KindRuns belonging to the same file. 261 type FileRun struct { 262 File *File 263 Groups []KindRun 264 } 265 266 // Spots are sorted by file path for the reduction into FileRuns. 267 func lessSpot(x, y interface{}) bool { 268 fx := x.(Spot).File 269 fy := y.(Spot).File 270 // same as "return fx.Path() < fy.Path()" but w/o computing the file path first 271 px := fx.Pak.Path 272 py := fy.Pak.Path 273 return px < py || px == py && fx.Name < fy.Name 274 } 275 276 // newFileRun allocates a new FileRun from the Spot run h. 277 func newFileRun(h RunList) interface{} { 278 file := h[0].(Spot).File 279 280 // reduce the list of Spots into a list of KindRuns 281 h1 := make(RunList, len(h)) 282 for i, x := range h { 283 h1[i] = x.(Spot).Info 284 } 285 h2 := h1.reduce(lessKind, newKindRun) 286 287 // create the FileRun 288 groups := make([]KindRun, len(h2)) 289 for i, x := range h2 { 290 groups[i] = x.(KindRun) 291 } 292 return &FileRun{file, groups} 293 } 294 295 // ---------------------------------------------------------------------------- 296 // PakRun 297 298 // A PakRun describes a run of *FileRuns of a package. 299 type PakRun struct { 300 Pak *Pak 301 Files []*FileRun 302 } 303 304 // Sorting support for files within a PakRun. 305 func (p *PakRun) Len() int { return len(p.Files) } 306 func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name } 307 func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] } 308 309 // FileRuns are sorted by package for the reduction into PakRuns. 310 func lessFileRun(x, y interface{}) bool { 311 return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak) 312 } 313 314 // newPakRun allocates a new PakRun from the *FileRun run h. 315 func newPakRun(h RunList) interface{} { 316 pak := h[0].(*FileRun).File.Pak 317 files := make([]*FileRun, len(h)) 318 for i, x := range h { 319 files[i] = x.(*FileRun) 320 } 321 run := &PakRun{pak, files} 322 sort.Sort(run) // files were sorted by package; sort them by file now 323 return run 324 } 325 326 // ---------------------------------------------------------------------------- 327 // HitList 328 329 // A HitList describes a list of PakRuns. 330 type HitList []*PakRun 331 332 // PakRuns are sorted by package. 333 func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) } 334 335 func reduce(h0 RunList) HitList { 336 // reduce a list of Spots into a list of FileRuns 337 h1 := h0.reduce(lessSpot, newFileRun) 338 // reduce a list of FileRuns into a list of PakRuns 339 h2 := h1.reduce(lessFileRun, newPakRun) 340 // sort the list of PakRuns by package 341 h2.sort(lessPakRun) 342 // create a HitList 343 h := make(HitList, len(h2)) 344 for i, p := range h2 { 345 h[i] = p.(*PakRun) 346 } 347 return h 348 } 349 350 // filter returns a new HitList created by filtering 351 // all PakRuns from h that have a matching pakname. 352 func (h HitList) filter(pakname string) HitList { 353 var hh HitList 354 for _, p := range h { 355 if p.Pak.Name == pakname { 356 hh = append(hh, p) 357 } 358 } 359 return hh 360 } 361 362 // ---------------------------------------------------------------------------- 363 // AltWords 364 365 type wordPair struct { 366 canon string // canonical word spelling (all lowercase) 367 alt string // alternative spelling 368 } 369 370 // An AltWords describes a list of alternative spellings for a 371 // canonical (all lowercase) spelling of a word. 372 type AltWords struct { 373 Canon string // canonical word spelling (all lowercase) 374 Alts []string // alternative spelling for the same word 375 } 376 377 // wordPairs are sorted by their canonical spelling. 378 func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon } 379 380 // newAltWords allocates a new AltWords from the *wordPair run h. 381 func newAltWords(h RunList) interface{} { 382 canon := h[0].(*wordPair).canon 383 alts := make([]string, len(h)) 384 for i, x := range h { 385 alts[i] = x.(*wordPair).alt 386 } 387 return &AltWords{canon, alts} 388 } 389 390 func (a *AltWords) filter(s string) *AltWords { 391 var alts []string 392 for _, w := range a.Alts { 393 if w != s { 394 alts = append(alts, w) 395 } 396 } 397 if len(alts) > 0 { 398 return &AltWords{a.Canon, alts} 399 } 400 return nil 401 } 402 403 // ---------------------------------------------------------------------------- 404 // Indexer 405 406 // Adjust these flags as seems best. 407 const includeMainPackages = true 408 const includeTestFiles = true 409 410 type IndexResult struct { 411 Decls RunList // package-level declarations (with snippets) 412 Others RunList // all other occurrences 413 } 414 415 // Statistics provides statistics information for an index. 416 type Statistics struct { 417 Bytes int // total size of indexed source files 418 Files int // number of indexed source files 419 Lines int // number of lines (all files) 420 Words int // number of different identifiers 421 Spots int // number of identifier occurrences 422 } 423 424 // An Indexer maintains the data structures and provides the machinery 425 // for indexing .go files under a file tree. It implements the path.Visitor 426 // interface for walking file trees, and the ast.Visitor interface for 427 // walking Go ASTs. 428 type Indexer struct { 429 fset *token.FileSet // file set for all indexed files 430 sources bytes.Buffer // concatenated sources 431 packages map[string]*Pak // map of canonicalized *Paks 432 words map[string]*IndexResult // RunLists of Spots 433 snippets []*Snippet // indices are stored in SpotInfos 434 current *token.File // last file added to file set 435 file *File // AST for current file 436 decl ast.Decl // AST for current decl 437 stats Statistics 438 } 439 440 func (x *Indexer) lookupPackage(path, name string) *Pak { 441 // In the source directory tree, more than one package may 442 // live in the same directory. For the packages map, construct 443 // a key that includes both the directory path and the package 444 // name. 445 key := path + ":" + name 446 pak := x.packages[key] 447 if pak == nil { 448 pak = &Pak{path, name} 449 x.packages[key] = pak 450 } 451 return pak 452 } 453 454 func (x *Indexer) addSnippet(s *Snippet) int { 455 index := len(x.snippets) 456 x.snippets = append(x.snippets, s) 457 return index 458 } 459 460 func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) { 461 if id != nil { 462 lists, found := x.words[id.Name] 463 if !found { 464 lists = new(IndexResult) 465 x.words[id.Name] = lists 466 } 467 468 if kind == Use || x.decl == nil { 469 // not a declaration or no snippet required 470 info := makeSpotInfo(kind, x.current.Line(id.Pos()), false) 471 lists.Others = append(lists.Others, Spot{x.file, info}) 472 } else { 473 // a declaration with snippet 474 index := x.addSnippet(NewSnippet(x.fset, x.decl, id)) 475 info := makeSpotInfo(kind, index, true) 476 lists.Decls = append(lists.Decls, Spot{x.file, info}) 477 } 478 479 x.stats.Spots++ 480 } 481 } 482 483 func (x *Indexer) visitFieldList(kind SpotKind, list *ast.FieldList) { 484 for _, f := range list.List { 485 x.decl = nil // no snippets for fields 486 for _, name := range f.Names { 487 x.visitIdent(kind, name) 488 } 489 ast.Walk(x, f.Type) 490 // ignore tag - not indexed at the moment 491 } 492 } 493 494 func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) { 495 switch n := spec.(type) { 496 case *ast.ImportSpec: 497 x.visitIdent(ImportDecl, n.Name) 498 // ignore path - not indexed at the moment 499 500 case *ast.ValueSpec: 501 for _, n := range n.Names { 502 x.visitIdent(kind, n) 503 } 504 ast.Walk(x, n.Type) 505 for _, v := range n.Values { 506 ast.Walk(x, v) 507 } 508 509 case *ast.TypeSpec: 510 x.visitIdent(TypeDecl, n.Name) 511 ast.Walk(x, n.Type) 512 } 513 } 514 515 func (x *Indexer) visitGenDecl(decl *ast.GenDecl) { 516 kind := VarDecl 517 if decl.Tok == token.CONST { 518 kind = ConstDecl 519 } 520 x.decl = decl 521 for _, s := range decl.Specs { 522 x.visitSpec(kind, s) 523 } 524 } 525 526 func (x *Indexer) Visit(node ast.Node) ast.Visitor { 527 switch n := node.(type) { 528 case nil: 529 // nothing to do 530 531 case *ast.Ident: 532 x.visitIdent(Use, n) 533 534 case *ast.FieldList: 535 x.visitFieldList(VarDecl, n) 536 537 case *ast.InterfaceType: 538 x.visitFieldList(MethodDecl, n.Methods) 539 540 case *ast.DeclStmt: 541 // local declarations should only be *ast.GenDecls; 542 // ignore incorrect ASTs 543 if decl, ok := n.Decl.(*ast.GenDecl); ok { 544 x.decl = nil // no snippets for local declarations 545 x.visitGenDecl(decl) 546 } 547 548 case *ast.GenDecl: 549 x.decl = n 550 x.visitGenDecl(n) 551 552 case *ast.FuncDecl: 553 kind := FuncDecl 554 if n.Recv != nil { 555 kind = MethodDecl 556 ast.Walk(x, n.Recv) 557 } 558 x.decl = n 559 x.visitIdent(kind, n.Name) 560 ast.Walk(x, n.Type) 561 if n.Body != nil { 562 ast.Walk(x, n.Body) 563 } 564 565 case *ast.File: 566 x.decl = nil 567 x.visitIdent(PackageClause, n.Name) 568 for _, d := range n.Decls { 569 ast.Walk(x, d) 570 } 571 572 default: 573 return x 574 } 575 576 return nil 577 } 578 579 func pkgName(filename string) string { 580 // use a new file set each time in order to not pollute the indexer's 581 // file set (which must stay in sync with the concatenated source code) 582 file, err := parser.ParseFile(token.NewFileSet(), filename, nil, parser.PackageClauseOnly) 583 if err != nil || file == nil { 584 return "" 585 } 586 return file.Name.Name 587 } 588 589 // addFile adds a file to the index if possible and returns the file set file 590 // and the file's AST if it was successfully parsed as a Go file. If addFile 591 // failed (that is, if the file was not added), it returns file == nil. 592 func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) { 593 // open file 594 f, err := fs.Open(filename) 595 if err != nil { 596 return 597 } 598 defer f.Close() 599 600 // The file set's base offset and x.sources size must be in lock-step; 601 // this permits the direct mapping of suffix array lookup results to 602 // to corresponding Pos values. 603 // 604 // When a file is added to the file set, its offset base increases by 605 // the size of the file + 1; and the initial base offset is 1. Add an 606 // extra byte to the sources here. 607 x.sources.WriteByte(0) 608 609 // If the sources length doesn't match the file set base at this point 610 // the file set implementation changed or we have another error. 611 base := x.fset.Base() 612 if x.sources.Len() != base { 613 panic("internal error: file base incorrect") 614 } 615 616 // append file contents (src) to x.sources 617 if _, err := x.sources.ReadFrom(f); err == nil { 618 src := x.sources.Bytes()[base:] 619 620 if goFile { 621 // parse the file and in the process add it to the file set 622 if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil { 623 file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file 624 return 625 } 626 // file has parse errors, and the AST may be incorrect - 627 // set lines information explicitly and index as ordinary 628 // text file (cannot fall through to the text case below 629 // because the file has already been added to the file set 630 // by the parser) 631 file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file 632 file.SetLinesForContent(src) 633 ast = nil 634 return 635 } 636 637 if isText(src) { 638 // only add the file to the file set (for the full text index) 639 file = x.fset.AddFile(filename, x.fset.Base(), len(src)) 640 file.SetLinesForContent(src) 641 return 642 } 643 } 644 645 // discard possibly added data 646 x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added 647 return 648 } 649 650 // Design note: Using an explicit white list of permitted files for indexing 651 // makes sure that the important files are included and massively reduces the 652 // number of files to index. The advantage over a blacklist is that unexpected 653 // (non-blacklisted) files won't suddenly explode the index. 654 655 // Files are whitelisted if they have a file name or extension 656 // present as key in whitelisted. 657 var whitelisted = map[string]bool{ 658 ".bash": true, 659 ".c": true, 660 ".css": true, 661 ".go": true, 662 ".goc": true, 663 ".h": true, 664 ".html": true, 665 ".js": true, 666 ".out": true, 667 ".py": true, 668 ".s": true, 669 ".sh": true, 670 ".txt": true, 671 ".xml": true, 672 "AUTHORS": true, 673 "CONTRIBUTORS": true, 674 "LICENSE": true, 675 "Makefile": true, 676 "PATENTS": true, 677 "README": true, 678 } 679 680 // isWhitelisted returns true if a file is on the list 681 // of "permitted" files for indexing. The filename must 682 // be the directory-local name of the file. 683 func isWhitelisted(filename string) bool { 684 key := pathpkg.Ext(filename) 685 if key == "" { 686 // file has no extension - use entire filename 687 key = filename 688 } 689 return whitelisted[key] 690 } 691 692 func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) { 693 if f.IsDir() { 694 return 695 } 696 697 filename := pathpkg.Join(dirname, f.Name()) 698 goFile := false 699 700 switch { 701 case isGoFile(f): 702 if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) { 703 return 704 } 705 if !includeMainPackages && pkgName(filename) == "main" { 706 return 707 } 708 goFile = true 709 710 case !fulltextIndex || !isWhitelisted(f.Name()): 711 return 712 } 713 714 file, fast := x.addFile(filename, goFile) 715 if file == nil { 716 return // addFile failed 717 } 718 719 if fast != nil { 720 // we've got a Go file to index 721 x.current = file 722 pak := x.lookupPackage(dirname, fast.Name.Name) 723 x.file = &File{f.Name(), pak} 724 ast.Walk(x, fast) 725 } 726 727 // update statistics 728 x.stats.Bytes += file.Size() 729 x.stats.Files++ 730 x.stats.Lines += file.LineCount() 731 } 732 733 // ---------------------------------------------------------------------------- 734 // Index 735 736 type LookupResult struct { 737 Decls HitList // package-level declarations (with snippets) 738 Others HitList // all other occurrences 739 } 740 741 type Index struct { 742 fset *token.FileSet // file set used during indexing; nil if no textindex 743 suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex 744 words map[string]*LookupResult // maps words to hit lists 745 alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings 746 snippets []*Snippet // all snippets, indexed by snippet index 747 stats Statistics 748 } 749 750 func canonical(w string) string { return strings.ToLower(w) } 751 752 // NewIndex creates a new index for the .go files 753 // in the directories given by dirnames. 754 // 755 func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Index { 756 var x Indexer 757 th := NewThrottle(throttle, 100*time.Millisecond) // run at least 0.1s at a time 758 759 // initialize Indexer 760 // (use some reasonably sized maps to start) 761 x.fset = token.NewFileSet() 762 x.packages = make(map[string]*Pak, 256) 763 x.words = make(map[string]*IndexResult, 8192) 764 765 // index all files in the directories given by dirnames 766 for dirname := range dirnames { 767 list, err := fs.ReadDir(dirname) 768 if err != nil { 769 continue // ignore this directory 770 } 771 for _, f := range list { 772 if !f.IsDir() { 773 x.visitFile(dirname, f, fulltextIndex) 774 } 775 th.Throttle() 776 } 777 } 778 779 if !fulltextIndex { 780 // the file set, the current file, and the sources are 781 // not needed after indexing if no text index is built - 782 // help GC and clear them 783 x.fset = nil 784 x.sources.Reset() 785 x.current = nil // contains reference to fset! 786 } 787 788 // for each word, reduce the RunLists into a LookupResult; 789 // also collect the word with its canonical spelling in a 790 // word list for later computation of alternative spellings 791 words := make(map[string]*LookupResult) 792 var wlist RunList 793 for w, h := range x.words { 794 decls := reduce(h.Decls) 795 others := reduce(h.Others) 796 words[w] = &LookupResult{ 797 Decls: decls, 798 Others: others, 799 } 800 wlist = append(wlist, &wordPair{canonical(w), w}) 801 th.Throttle() 802 } 803 x.stats.Words = len(words) 804 805 // reduce the word list {canonical(w), w} into 806 // a list of AltWords runs {canonical(w), {w}} 807 alist := wlist.reduce(lessWordPair, newAltWords) 808 809 // convert alist into a map of alternative spellings 810 alts := make(map[string]*AltWords) 811 for i := 0; i < len(alist); i++ { 812 a := alist[i].(*AltWords) 813 alts[a.Canon] = a 814 } 815 816 // create text index 817 var suffixes *suffixarray.Index 818 if fulltextIndex { 819 suffixes = suffixarray.New(x.sources.Bytes()) 820 } 821 822 return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats} 823 } 824 825 type fileIndex struct { 826 Words map[string]*LookupResult 827 Alts map[string]*AltWords 828 Snippets []*Snippet 829 Fulltext bool 830 } 831 832 func (x *fileIndex) Write(w io.Writer) error { 833 return gob.NewEncoder(w).Encode(x) 834 } 835 836 func (x *fileIndex) Read(r io.Reader) error { 837 return gob.NewDecoder(r).Decode(x) 838 } 839 840 // Write writes the index x to w. 841 func (x *Index) Write(w io.Writer) error { 842 fulltext := false 843 if x.suffixes != nil { 844 fulltext = true 845 } 846 fx := fileIndex{ 847 x.words, 848 x.alts, 849 x.snippets, 850 fulltext, 851 } 852 if err := fx.Write(w); err != nil { 853 return err 854 } 855 if fulltext { 856 encode := func(x interface{}) error { 857 return gob.NewEncoder(w).Encode(x) 858 } 859 if err := x.fset.Write(encode); err != nil { 860 return err 861 } 862 if err := x.suffixes.Write(w); err != nil { 863 return err 864 } 865 } 866 return nil 867 } 868 869 // Read reads the index from r into x; x must not be nil. 870 // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader. 871 func (x *Index) Read(r io.Reader) error { 872 // We use the ability to read bytes as a plausible surrogate for buffering. 873 if _, ok := r.(io.ByteReader); !ok { 874 r = bufio.NewReader(r) 875 } 876 var fx fileIndex 877 if err := fx.Read(r); err != nil { 878 return err 879 } 880 x.words = fx.Words 881 x.alts = fx.Alts 882 x.snippets = fx.Snippets 883 if fx.Fulltext { 884 x.fset = token.NewFileSet() 885 decode := func(x interface{}) error { 886 return gob.NewDecoder(r).Decode(x) 887 } 888 if err := x.fset.Read(decode); err != nil { 889 return err 890 } 891 x.suffixes = new(suffixarray.Index) 892 if err := x.suffixes.Read(r); err != nil { 893 return err 894 } 895 } 896 return nil 897 } 898 899 // Stats() returns index statistics. 900 func (x *Index) Stats() Statistics { 901 return x.stats 902 } 903 904 func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) { 905 match = x.words[w] 906 alt = x.alts[canonical(w)] 907 // remove current spelling from alternatives 908 // (if there is no match, the alternatives do 909 // not contain the current spelling) 910 if match != nil && alt != nil { 911 alt = alt.filter(w) 912 } 913 return 914 } 915 916 // isIdentifier reports whether s is a Go identifier. 917 func isIdentifier(s string) bool { 918 for i, ch := range s { 919 if unicode.IsLetter(ch) || ch == ' ' || i > 0 && unicode.IsDigit(ch) { 920 continue 921 } 922 return false 923 } 924 return len(s) > 0 925 } 926 927 // For a given query, which is either a single identifier or a qualified 928 // identifier, Lookup returns a list of packages, a LookupResult, and a 929 // list of alternative spellings, if any. Any and all results may be nil. 930 // If the query syntax is wrong, an error is reported. 931 func (x *Index) Lookup(query string) (paks HitList, match *LookupResult, alt *AltWords, err error) { 932 ss := strings.Split(query, ".") 933 934 // check query syntax 935 for _, s := range ss { 936 if !isIdentifier(s) { 937 err = errors.New("all query parts must be identifiers") 938 return 939 } 940 } 941 942 // handle simple and qualified identifiers 943 switch len(ss) { 944 case 1: 945 ident := ss[0] 946 match, alt = x.lookupWord(ident) 947 if match != nil { 948 // found a match - filter packages with same name 949 // for the list of packages called ident, if any 950 paks = match.Others.filter(ident) 951 } 952 953 case 2: 954 pakname, ident := ss[0], ss[1] 955 match, alt = x.lookupWord(ident) 956 if match != nil { 957 // found a match - filter by package name 958 // (no paks - package names are not qualified) 959 decls := match.Decls.filter(pakname) 960 others := match.Others.filter(pakname) 961 match = &LookupResult{decls, others} 962 } 963 964 default: 965 err = errors.New("query is not a (qualified) identifier") 966 } 967 968 return 969 } 970 971 func (x *Index) Snippet(i int) *Snippet { 972 // handle illegal snippet indices gracefully 973 if 0 <= i && i < len(x.snippets) { 974 return x.snippets[i] 975 } 976 return nil 977 } 978 979 type positionList []struct { 980 filename string 981 line int 982 } 983 984 func (list positionList) Len() int { return len(list) } 985 func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename } 986 func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] } 987 988 // unique returns the list sorted and with duplicate entries removed 989 func unique(list []int) []int { 990 sort.Ints(list) 991 var last int 992 i := 0 993 for _, x := range list { 994 if i == 0 || x != last { 995 last = x 996 list[i] = x 997 i++ 998 } 999 } 1000 return list[0:i] 1001 } 1002 1003 // A FileLines value specifies a file and line numbers within that file. 1004 type FileLines struct { 1005 Filename string 1006 Lines []int 1007 } 1008 1009 // LookupRegexp returns the number of matches and the matches where a regular 1010 // expression r is found in the full text index. At most n matches are 1011 // returned (thus found <= n). 1012 // 1013 func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) { 1014 if x.suffixes == nil || n <= 0 { 1015 return 1016 } 1017 // n > 0 1018 1019 var list positionList 1020 // FindAllIndex may returns matches that span across file boundaries. 1021 // Such matches are unlikely, buf after eliminating them we may end up 1022 // with fewer than n matches. If we don't have enough at the end, redo 1023 // the search with an increased value n1, but only if FindAllIndex 1024 // returned all the requested matches in the first place (if it 1025 // returned fewer than that there cannot be more). 1026 for n1 := n; found < n; n1 += n - found { 1027 found = 0 1028 matches := x.suffixes.FindAllIndex(r, n1) 1029 // compute files, exclude matches that span file boundaries, 1030 // and map offsets to file-local offsets 1031 list = make(positionList, len(matches)) 1032 for _, m := range matches { 1033 // by construction, an offset corresponds to the Pos value 1034 // for the file set - use it to get the file and line 1035 p := token.Pos(m[0]) 1036 if file := x.fset.File(p); file != nil { 1037 if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() { 1038 // match [m[0], m[1]) is within the file boundaries 1039 list[found].filename = file.Name() 1040 list[found].line = file.Line(p) 1041 found++ 1042 } 1043 } 1044 } 1045 if found == n || len(matches) < n1 { 1046 // found all matches or there's no chance to find more 1047 break 1048 } 1049 } 1050 list = list[0:found] 1051 sort.Sort(list) // sort by filename 1052 1053 // collect matches belonging to the same file 1054 var last string 1055 var lines []int 1056 addLines := func() { 1057 if len(lines) > 0 { 1058 // remove duplicate lines 1059 result = append(result, FileLines{last, unique(lines)}) 1060 lines = nil 1061 } 1062 } 1063 for _, m := range list { 1064 if m.filename != last { 1065 addLines() 1066 last = m.filename 1067 } 1068 lines = append(lines, m.line) 1069 } 1070 addLines() 1071 1072 return 1073 }