golang.org/x/tools@v0.21.0/godoc/index.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file contains the infrastructure to create an 6 // identifier and full-text index for a set of Go files. 7 // 8 // Algorithm for identifier index: 9 // - traverse all .go files of the file tree specified by root 10 // - for each identifier (word) encountered, collect all occurrences (spots) 11 // into a list; this produces a list of spots for each word 12 // - reduce the lists: from a list of spots to a list of FileRuns, 13 // and from a list of FileRuns into a list of PakRuns 14 // - make a HitList from the PakRuns 15 // 16 // Details: 17 // - keep two lists per word: one containing package-level declarations 18 // that have snippets, and one containing all other spots 19 // - keep the snippets in a separate table indexed by snippet index 20 // and store the snippet index in place of the line number in a SpotInfo 21 // (the line number for spots with snippets is stored in the snippet) 22 // - at the end, create lists of alternative spellings for a given 23 // word 24 // 25 // Algorithm for full text index: 26 // - concatenate all source code in a byte buffer (in memory) 27 // - add the files to a file set in lockstep as they are added to the byte 28 // buffer such that a byte buffer offset corresponds to the Pos value for 29 // that file location 30 // - create a suffix array from the concatenated sources 31 // 32 // String lookup in full text index: 33 // - use the suffix array to lookup a string's offsets - the offsets 34 // correspond to the Pos values relative to the file set 35 // - translate the Pos values back into file and line information and 36 // sort the result 37 38 package godoc 39 40 import ( 41 "bufio" 42 "bytes" 43 "encoding/gob" 44 "errors" 45 "fmt" 46 "go/ast" 47 "go/doc" 48 "go/parser" 49 "go/token" 50 "index/suffixarray" 51 "io" 52 "log" 53 "math" 54 "os" 55 pathpkg "path" 56 "path/filepath" 57 "regexp" 58 "runtime" 59 "sort" 60 "strconv" 61 "strings" 62 "sync" 63 "time" 64 "unicode" 65 66 "golang.org/x/tools/godoc/util" 67 "golang.org/x/tools/godoc/vfs" 68 ) 69 70 // ---------------------------------------------------------------------------- 71 // InterfaceSlice is a helper type for sorting interface 72 // slices according to some slice-specific sort criteria. 73 74 type comparer func(x, y interface{}) bool 75 76 type interfaceSlice struct { 77 slice []interface{} 78 less comparer 79 } 80 81 // ---------------------------------------------------------------------------- 82 // RunList 83 84 // A RunList is a list of entries that can be sorted according to some 85 // criteria. A RunList may be compressed by grouping "runs" of entries 86 // which are equal (according to the sort criteria) into a new RunList of 87 // runs. For instance, a RunList containing pairs (x, y) may be compressed 88 // into a RunList containing pair runs (x, {y}) where each run consists of 89 // a list of y's with the same x. 90 type RunList []interface{} 91 92 func (h RunList) sort(less comparer) { 93 sort.Sort(&interfaceSlice{h, less}) 94 } 95 96 func (p *interfaceSlice) Len() int { return len(p.slice) } 97 func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) } 98 func (p *interfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] } 99 100 // Compress entries which are the same according to a sort criteria 101 // (specified by less) into "runs". 102 func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList { 103 if len(h) == 0 { 104 return nil 105 } 106 // len(h) > 0 107 108 // create runs of entries with equal values 109 h.sort(less) 110 111 // for each run, make a new run object and collect them in a new RunList 112 var hh RunList 113 i, x := 0, h[0] 114 for j, y := range h { 115 if less(x, y) { 116 hh = append(hh, newRun(h[i:j])) 117 i, x = j, h[j] // start a new run 118 } 119 } 120 // add final run, if any 121 if i < len(h) { 122 hh = append(hh, newRun(h[i:])) 123 } 124 125 return hh 126 } 127 128 // ---------------------------------------------------------------------------- 129 // KindRun 130 131 // Debugging support. Disable to see multiple entries per line. 132 const removeDuplicates = true 133 134 // A KindRun is a run of SpotInfos of the same kind in a given file. 135 // The kind (3 bits) is stored in each SpotInfo element; to find the 136 // kind of a KindRun, look at any of its elements. 137 type KindRun []SpotInfo 138 139 // KindRuns are sorted by line number or index. Since the isIndex bit 140 // is always the same for all infos in one list we can compare lori's. 141 func (k KindRun) Len() int { return len(k) } 142 func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() } 143 func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] } 144 145 // FileRun contents are sorted by Kind for the reduction into KindRuns. 146 func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() } 147 148 // newKindRun allocates a new KindRun from the SpotInfo run h. 149 func newKindRun(h RunList) interface{} { 150 run := make(KindRun, len(h)) 151 for i, x := range h { 152 run[i] = x.(SpotInfo) 153 } 154 155 // Spots were sorted by file and kind to create this run. 156 // Within this run, sort them by line number or index. 157 sort.Sort(run) 158 159 if removeDuplicates { 160 // Since both the lori and kind field must be 161 // same for duplicates, and since the isIndex 162 // bit is always the same for all infos in one 163 // list we can simply compare the entire info. 164 k := 0 165 prev := SpotInfo(math.MaxUint32) // an unlikely value 166 for _, x := range run { 167 if x != prev { 168 run[k] = x 169 k++ 170 prev = x 171 } 172 } 173 run = run[0:k] 174 } 175 176 return run 177 } 178 179 // ---------------------------------------------------------------------------- 180 // FileRun 181 182 // A Pak describes a Go package. 183 type Pak struct { 184 Path string // path of directory containing the package 185 Name string // package name as declared by package clause 186 } 187 188 // Paks are sorted by name (primary key) and by import path (secondary key). 189 func (p *Pak) less(q *Pak) bool { 190 return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path 191 } 192 193 // A File describes a Go file. 194 type File struct { 195 Name string // directory-local file name 196 Pak *Pak // the package to which the file belongs 197 } 198 199 // Path returns the file path of f. 200 func (f *File) Path() string { 201 return pathpkg.Join(f.Pak.Path, f.Name) 202 } 203 204 // A Spot describes a single occurrence of a word. 205 type Spot struct { 206 File *File 207 Info SpotInfo 208 } 209 210 // A FileRun is a list of KindRuns belonging to the same file. 211 type FileRun struct { 212 File *File 213 Groups []KindRun 214 } 215 216 // Spots are sorted by file path for the reduction into FileRuns. 217 func lessSpot(x, y interface{}) bool { 218 fx := x.(Spot).File 219 fy := y.(Spot).File 220 // same as "return fx.Path() < fy.Path()" but w/o computing the file path first 221 px := fx.Pak.Path 222 py := fy.Pak.Path 223 return px < py || px == py && fx.Name < fy.Name 224 } 225 226 // newFileRun allocates a new FileRun from the Spot run h. 227 func newFileRun(h RunList) interface{} { 228 file := h[0].(Spot).File 229 230 // reduce the list of Spots into a list of KindRuns 231 h1 := make(RunList, len(h)) 232 for i, x := range h { 233 h1[i] = x.(Spot).Info 234 } 235 h2 := h1.reduce(lessKind, newKindRun) 236 237 // create the FileRun 238 groups := make([]KindRun, len(h2)) 239 for i, x := range h2 { 240 groups[i] = x.(KindRun) 241 } 242 return &FileRun{file, groups} 243 } 244 245 // ---------------------------------------------------------------------------- 246 // PakRun 247 248 // A PakRun describes a run of *FileRuns of a package. 249 type PakRun struct { 250 Pak *Pak 251 Files []*FileRun 252 } 253 254 // Sorting support for files within a PakRun. 255 func (p *PakRun) Len() int { return len(p.Files) } 256 func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name } 257 func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] } 258 259 // FileRuns are sorted by package for the reduction into PakRuns. 260 func lessFileRun(x, y interface{}) bool { 261 return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak) 262 } 263 264 // newPakRun allocates a new PakRun from the *FileRun run h. 265 func newPakRun(h RunList) interface{} { 266 pak := h[0].(*FileRun).File.Pak 267 files := make([]*FileRun, len(h)) 268 for i, x := range h { 269 files[i] = x.(*FileRun) 270 } 271 run := &PakRun{pak, files} 272 sort.Sort(run) // files were sorted by package; sort them by file now 273 return run 274 } 275 276 // ---------------------------------------------------------------------------- 277 // HitList 278 279 // A HitList describes a list of PakRuns. 280 type HitList []*PakRun 281 282 // PakRuns are sorted by package. 283 func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) } 284 285 func reduce(h0 RunList) HitList { 286 // reduce a list of Spots into a list of FileRuns 287 h1 := h0.reduce(lessSpot, newFileRun) 288 // reduce a list of FileRuns into a list of PakRuns 289 h2 := h1.reduce(lessFileRun, newPakRun) 290 // sort the list of PakRuns by package 291 h2.sort(lessPakRun) 292 // create a HitList 293 h := make(HitList, len(h2)) 294 for i, p := range h2 { 295 h[i] = p.(*PakRun) 296 } 297 return h 298 } 299 300 // filter returns a new HitList created by filtering 301 // all PakRuns from h that have a matching pakname. 302 func (h HitList) filter(pakname string) HitList { 303 var hh HitList 304 for _, p := range h { 305 if p.Pak.Name == pakname { 306 hh = append(hh, p) 307 } 308 } 309 return hh 310 } 311 312 // ---------------------------------------------------------------------------- 313 // AltWords 314 315 type wordPair struct { 316 canon string // canonical word spelling (all lowercase) 317 alt string // alternative spelling 318 } 319 320 // An AltWords describes a list of alternative spellings for a 321 // canonical (all lowercase) spelling of a word. 322 type AltWords struct { 323 Canon string // canonical word spelling (all lowercase) 324 Alts []string // alternative spelling for the same word 325 } 326 327 // wordPairs are sorted by their canonical spelling. 328 func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon } 329 330 // newAltWords allocates a new AltWords from the *wordPair run h. 331 func newAltWords(h RunList) interface{} { 332 canon := h[0].(*wordPair).canon 333 alts := make([]string, len(h)) 334 for i, x := range h { 335 alts[i] = x.(*wordPair).alt 336 } 337 return &AltWords{canon, alts} 338 } 339 340 func (a *AltWords) filter(s string) *AltWords { 341 var alts []string 342 for _, w := range a.Alts { 343 if w != s { 344 alts = append(alts, w) 345 } 346 } 347 if len(alts) > 0 { 348 return &AltWords{a.Canon, alts} 349 } 350 return nil 351 } 352 353 // Ident stores information about external identifiers in order to create 354 // links to package documentation. 355 type Ident struct { 356 Path string // e.g. "net/http" 357 Package string // e.g. "http" 358 Name string // e.g. "NewRequest" 359 Doc string // e.g. "NewRequest returns a new Request..." 360 } 361 362 // byImportCount sorts the given slice of Idents by the import 363 // counts of the packages to which they belong. 364 type byImportCount struct { 365 Idents []Ident 366 ImportCount map[string]int 367 } 368 369 func (ic byImportCount) Len() int { 370 return len(ic.Idents) 371 } 372 373 func (ic byImportCount) Less(i, j int) bool { 374 ri := ic.ImportCount[ic.Idents[i].Path] 375 rj := ic.ImportCount[ic.Idents[j].Path] 376 if ri == rj { 377 return ic.Idents[i].Path < ic.Idents[j].Path 378 } 379 return ri > rj 380 } 381 382 func (ic byImportCount) Swap(i, j int) { 383 ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i] 384 } 385 386 func (ic byImportCount) String() string { 387 buf := bytes.NewBuffer([]byte("[")) 388 for _, v := range ic.Idents { 389 buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path])) 390 } 391 buf.WriteString("\n]") 392 return buf.String() 393 } 394 395 // filter creates a new Ident list where the results match the given 396 // package name. 397 func (ic byImportCount) filter(pakname string) []Ident { 398 if ic.Idents == nil { 399 return nil 400 } 401 var res []Ident 402 for _, i := range ic.Idents { 403 if i.Package == pakname { 404 res = append(res, i) 405 } 406 } 407 return res 408 } 409 410 // top returns the top n identifiers. 411 func (ic byImportCount) top(n int) []Ident { 412 if len(ic.Idents) > n { 413 return ic.Idents[:n] 414 } 415 return ic.Idents 416 } 417 418 // ---------------------------------------------------------------------------- 419 // Indexer 420 421 type IndexResult struct { 422 Decls RunList // package-level declarations (with snippets) 423 Others RunList // all other occurrences 424 } 425 426 // Statistics provides statistics information for an index. 427 type Statistics struct { 428 Bytes int // total size of indexed source files 429 Files int // number of indexed source files 430 Lines int // number of lines (all files) 431 Words int // number of different identifiers 432 Spots int // number of identifier occurrences 433 } 434 435 // An Indexer maintains the data structures and provides the machinery 436 // for indexing .go files under a file tree. It implements the path.Visitor 437 // interface for walking file trees, and the ast.Visitor interface for 438 // walking Go ASTs. 439 type Indexer struct { 440 c *Corpus 441 fset *token.FileSet // file set for all indexed files 442 fsOpenGate chan bool // send pre fs.Open; receive on close 443 444 mu sync.Mutex // guards all the following 445 sources bytes.Buffer // concatenated sources 446 strings map[string]string // interned string 447 packages map[Pak]*Pak // interned *Paks 448 words map[string]*IndexResult // RunLists of Spots 449 snippets []*Snippet // indices are stored in SpotInfos 450 current *token.File // last file added to file set 451 file *File // AST for current file 452 decl ast.Decl // AST for current decl 453 stats Statistics 454 throttle *util.Throttle 455 importCount map[string]int // package path ("net/http") => count 456 packagePath map[string]map[string]bool // "template" => "text/template" => true 457 exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl 458 curPkgExports map[string]SpotKind 459 idents map[SpotKind]map[string][]Ident // kind => name => list of Idents 460 } 461 462 func (x *Indexer) intern(s string) string { 463 if s, ok := x.strings[s]; ok { 464 return s 465 } 466 x.strings[s] = s 467 return s 468 } 469 470 func (x *Indexer) lookupPackage(path, name string) *Pak { 471 // In the source directory tree, more than one package may 472 // live in the same directory. For the packages map, construct 473 // a key that includes both the directory path and the package 474 // name. 475 key := Pak{Path: x.intern(path), Name: x.intern(name)} 476 pak := x.packages[key] 477 if pak == nil { 478 pak = &key 479 x.packages[key] = pak 480 } 481 return pak 482 } 483 484 func (x *Indexer) addSnippet(s *Snippet) int { 485 index := len(x.snippets) 486 x.snippets = append(x.snippets, s) 487 return index 488 } 489 490 func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) { 491 if id == nil { 492 return 493 } 494 name := x.intern(id.Name) 495 496 switch kind { 497 case TypeDecl, FuncDecl, ConstDecl, VarDecl: 498 x.curPkgExports[name] = kind 499 } 500 501 lists, found := x.words[name] 502 if !found { 503 lists = new(IndexResult) 504 x.words[name] = lists 505 } 506 507 if kind == Use || x.decl == nil { 508 if x.c.IndexGoCode { 509 // not a declaration or no snippet required 510 info := makeSpotInfo(kind, x.current.Line(id.Pos()), false) 511 lists.Others = append(lists.Others, Spot{x.file, info}) 512 } 513 } else { 514 // a declaration with snippet 515 index := x.addSnippet(NewSnippet(x.fset, x.decl, id)) 516 info := makeSpotInfo(kind, index, true) 517 lists.Decls = append(lists.Decls, Spot{x.file, info}) 518 } 519 520 x.stats.Spots++ 521 } 522 523 func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) { 524 for _, f := range flist.List { 525 x.decl = nil // no snippets for fields 526 for _, name := range f.Names { 527 x.visitIdent(kind, name) 528 } 529 ast.Walk(x, f.Type) 530 // ignore tag - not indexed at the moment 531 } 532 } 533 534 func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) { 535 switch n := spec.(type) { 536 case *ast.ImportSpec: 537 x.visitIdent(ImportDecl, n.Name) 538 if n.Path != nil { 539 if imp, err := strconv.Unquote(n.Path.Value); err == nil { 540 x.importCount[x.intern(imp)]++ 541 } 542 } 543 544 case *ast.ValueSpec: 545 for _, n := range n.Names { 546 x.visitIdent(kind, n) 547 } 548 ast.Walk(x, n.Type) 549 for _, v := range n.Values { 550 ast.Walk(x, v) 551 } 552 553 case *ast.TypeSpec: 554 x.visitIdent(TypeDecl, n.Name) 555 ast.Walk(x, n.Type) 556 } 557 } 558 559 func (x *Indexer) visitGenDecl(decl *ast.GenDecl) { 560 kind := VarDecl 561 if decl.Tok == token.CONST { 562 kind = ConstDecl 563 } 564 x.decl = decl 565 for _, s := range decl.Specs { 566 x.visitSpec(kind, s) 567 } 568 } 569 570 func (x *Indexer) Visit(node ast.Node) ast.Visitor { 571 switch n := node.(type) { 572 case nil: 573 // nothing to do 574 575 case *ast.Ident: 576 x.visitIdent(Use, n) 577 578 case *ast.FieldList: 579 x.visitFieldList(VarDecl, n) 580 581 case *ast.InterfaceType: 582 x.visitFieldList(MethodDecl, n.Methods) 583 584 case *ast.DeclStmt: 585 // local declarations should only be *ast.GenDecls; 586 // ignore incorrect ASTs 587 if decl, ok := n.Decl.(*ast.GenDecl); ok { 588 x.decl = nil // no snippets for local declarations 589 x.visitGenDecl(decl) 590 } 591 592 case *ast.GenDecl: 593 x.decl = n 594 x.visitGenDecl(n) 595 596 case *ast.FuncDecl: 597 kind := FuncDecl 598 if n.Recv != nil { 599 kind = MethodDecl 600 ast.Walk(x, n.Recv) 601 } 602 x.decl = n 603 x.visitIdent(kind, n.Name) 604 ast.Walk(x, n.Type) 605 if n.Body != nil { 606 ast.Walk(x, n.Body) 607 } 608 609 case *ast.File: 610 x.decl = nil 611 x.visitIdent(PackageClause, n.Name) 612 for _, d := range n.Decls { 613 ast.Walk(x, d) 614 } 615 616 default: 617 return x 618 } 619 620 return nil 621 } 622 623 // addFile adds a file to the index if possible and returns the file set file 624 // and the file's AST if it was successfully parsed as a Go file. If addFile 625 // failed (that is, if the file was not added), it returns file == nil. 626 func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) { 627 defer f.Close() 628 629 // The file set's base offset and x.sources size must be in lock-step; 630 // this permits the direct mapping of suffix array lookup results to 631 // corresponding Pos values. 632 // 633 // When a file is added to the file set, its offset base increases by 634 // the size of the file + 1; and the initial base offset is 1. Add an 635 // extra byte to the sources here. 636 x.sources.WriteByte(0) 637 638 // If the sources length doesn't match the file set base at this point 639 // the file set implementation changed or we have another error. 640 base := x.fset.Base() 641 if x.sources.Len() != base { 642 panic("internal error: file base incorrect") 643 } 644 645 // append file contents (src) to x.sources 646 if _, err := x.sources.ReadFrom(f); err == nil { 647 src := x.sources.Bytes()[base:] 648 649 if goFile { 650 // parse the file and in the process add it to the file set 651 if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil { 652 file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file 653 return 654 } 655 // file has parse errors, and the AST may be incorrect - 656 // set lines information explicitly and index as ordinary 657 // text file (cannot fall through to the text case below 658 // because the file has already been added to the file set 659 // by the parser) 660 file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file 661 file.SetLinesForContent(src) 662 ast = nil 663 return 664 } 665 666 if util.IsText(src) { 667 // only add the file to the file set (for the full text index) 668 file = x.fset.AddFile(filename, x.fset.Base(), len(src)) 669 file.SetLinesForContent(src) 670 return 671 } 672 } 673 674 // discard possibly added data 675 x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added 676 return 677 } 678 679 // Design note: Using an explicit white list of permitted files for indexing 680 // makes sure that the important files are included and massively reduces the 681 // number of files to index. The advantage over a blacklist is that unexpected 682 // (non-blacklisted) files won't suddenly explode the index. 683 684 // Files are whitelisted if they have a file name or extension 685 // present as key in whitelisted. 686 var whitelisted = map[string]bool{ 687 ".bash": true, 688 ".c": true, 689 ".cc": true, 690 ".cpp": true, 691 ".cxx": true, 692 ".css": true, 693 ".go": true, 694 ".goc": true, 695 ".h": true, 696 ".hh": true, 697 ".hpp": true, 698 ".hxx": true, 699 ".html": true, 700 ".js": true, 701 ".out": true, 702 ".py": true, 703 ".s": true, 704 ".sh": true, 705 ".txt": true, 706 ".xml": true, 707 "AUTHORS": true, 708 "CONTRIBUTORS": true, 709 "LICENSE": true, 710 "Makefile": true, 711 "PATENTS": true, 712 "README": true, 713 } 714 715 // isWhitelisted returns true if a file is on the list 716 // of "permitted" files for indexing. The filename must 717 // be the directory-local name of the file. 718 func isWhitelisted(filename string) bool { 719 key := pathpkg.Ext(filename) 720 if key == "" { 721 // file has no extension - use entire filename 722 key = filename 723 } 724 return whitelisted[key] 725 } 726 727 func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) { 728 pkgName := x.intern(astFile.Name.Name) 729 if pkgName == "main" { 730 return 731 } 732 pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/")) 733 astPkg := ast.Package{ 734 Name: pkgName, 735 Files: map[string]*ast.File{ 736 filename: astFile, 737 }, 738 } 739 var m doc.Mode 740 docPkg := doc.New(&astPkg, dirname, m) 741 addIdent := func(sk SpotKind, name string, docstr string) { 742 if x.idents[sk] == nil { 743 x.idents[sk] = make(map[string][]Ident) 744 } 745 name = x.intern(name) 746 x.idents[sk][name] = append(x.idents[sk][name], Ident{ 747 Path: pkgPath, 748 Package: pkgName, 749 Name: name, 750 Doc: doc.Synopsis(docstr), 751 }) 752 } 753 754 if x.idents[PackageClause] == nil { 755 x.idents[PackageClause] = make(map[string][]Ident) 756 } 757 // List of words under which the package identifier will be stored. 758 // This includes the package name and the components of the directory 759 // in which it resides. 760 words := strings.Split(pathpkg.Dir(pkgPath), "/") 761 if words[0] == "." { 762 words = []string{} 763 } 764 name := x.intern(docPkg.Name) 765 synopsis := doc.Synopsis(docPkg.Doc) 766 words = append(words, name) 767 pkgIdent := Ident{ 768 Path: pkgPath, 769 Package: pkgName, 770 Name: name, 771 Doc: synopsis, 772 } 773 for _, word := range words { 774 word = x.intern(word) 775 found := false 776 pkgs := x.idents[PackageClause][word] 777 for i, p := range pkgs { 778 if p.Path == pkgPath { 779 if docPkg.Doc != "" { 780 p.Doc = synopsis 781 pkgs[i] = p 782 } 783 found = true 784 break 785 } 786 } 787 if !found { 788 x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent) 789 } 790 } 791 792 for _, c := range docPkg.Consts { 793 for _, name := range c.Names { 794 addIdent(ConstDecl, name, c.Doc) 795 } 796 } 797 for _, t := range docPkg.Types { 798 addIdent(TypeDecl, t.Name, t.Doc) 799 for _, c := range t.Consts { 800 for _, name := range c.Names { 801 addIdent(ConstDecl, name, c.Doc) 802 } 803 } 804 for _, v := range t.Vars { 805 for _, name := range v.Names { 806 addIdent(VarDecl, name, v.Doc) 807 } 808 } 809 for _, f := range t.Funcs { 810 addIdent(FuncDecl, f.Name, f.Doc) 811 } 812 for _, f := range t.Methods { 813 addIdent(MethodDecl, f.Name, f.Doc) 814 // Change the name of methods to be "<typename>.<methodname>". 815 // They will still be indexed as <methodname>. 816 idents := x.idents[MethodDecl][f.Name] 817 idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name) 818 } 819 } 820 for _, v := range docPkg.Vars { 821 for _, name := range v.Names { 822 addIdent(VarDecl, name, v.Doc) 823 } 824 } 825 for _, f := range docPkg.Funcs { 826 addIdent(FuncDecl, f.Name, f.Doc) 827 } 828 } 829 830 func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) { 831 pkgName := astFile.Name.Name 832 833 if x.c.IndexGoCode { 834 x.current = file 835 pak := x.lookupPackage(dirname, pkgName) 836 x.file = &File{filename, pak} 837 ast.Walk(x, astFile) 838 } 839 840 if x.c.IndexDocs { 841 // Test files are already filtered out in visitFile if IndexGoCode and 842 // IndexFullText are false. Otherwise, check here. 843 isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) && 844 (strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/")) 845 if !isTestFile { 846 x.indexDocs(dirname, filename, astFile) 847 } 848 } 849 850 ppKey := x.intern(pkgName) 851 if _, ok := x.packagePath[ppKey]; !ok { 852 x.packagePath[ppKey] = make(map[string]bool) 853 } 854 pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/")) 855 x.packagePath[ppKey][pkgPath] = true 856 857 // Merge in exported symbols found walking this file into 858 // the map for that package. 859 if len(x.curPkgExports) > 0 { 860 dest, ok := x.exports[pkgPath] 861 if !ok { 862 dest = make(map[string]SpotKind) 863 x.exports[pkgPath] = dest 864 } 865 for k, v := range x.curPkgExports { 866 dest[k] = v 867 } 868 } 869 } 870 871 func (x *Indexer) visitFile(dirname string, fi os.FileInfo) { 872 if fi.IsDir() || !x.c.IndexEnabled { 873 return 874 } 875 876 filename := pathpkg.Join(dirname, fi.Name()) 877 goFile := isGoFile(fi) 878 879 switch { 880 case x.c.IndexFullText: 881 if !isWhitelisted(fi.Name()) { 882 return 883 } 884 case x.c.IndexGoCode: 885 if !goFile { 886 return 887 } 888 case x.c.IndexDocs: 889 if !goFile || 890 strings.HasSuffix(fi.Name(), "_test.go") || 891 strings.HasPrefix(dirname, "/test/") { 892 return 893 } 894 default: 895 // No indexing turned on. 896 return 897 } 898 899 x.fsOpenGate <- true 900 defer func() { <-x.fsOpenGate }() 901 902 // open file 903 f, err := x.c.fs.Open(filename) 904 if err != nil { 905 return 906 } 907 908 x.mu.Lock() 909 defer x.mu.Unlock() 910 911 x.throttle.Throttle() 912 913 x.curPkgExports = make(map[string]SpotKind) 914 file, fast := x.addFile(f, filename, goFile) 915 if file == nil { 916 return // addFile failed 917 } 918 919 if fast != nil { 920 x.indexGoFile(dirname, fi.Name(), file, fast) 921 } 922 923 // update statistics 924 x.stats.Bytes += file.Size() 925 x.stats.Files++ 926 x.stats.Lines += file.LineCount() 927 } 928 929 // indexOptions contains information that affects the contents of an index. 930 type indexOptions struct { 931 // Docs provides documentation search results. 932 // It is only consulted if IndexEnabled is true. 933 // The default values is true. 934 Docs bool 935 936 // GoCode provides Go source code search results. 937 // It is only consulted if IndexEnabled is true. 938 // The default values is true. 939 GoCode bool 940 941 // FullText provides search results from all files. 942 // It is only consulted if IndexEnabled is true. 943 // The default values is true. 944 FullText bool 945 946 // MaxResults optionally specifies the maximum results for indexing. 947 // The default is 1000. 948 MaxResults int 949 } 950 951 // ---------------------------------------------------------------------------- 952 // Index 953 954 type LookupResult struct { 955 Decls HitList // package-level declarations (with snippets) 956 Others HitList // all other occurrences 957 } 958 959 type Index struct { 960 fset *token.FileSet // file set used during indexing; nil if no textindex 961 suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex 962 words map[string]*LookupResult // maps words to hit lists 963 alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings 964 snippets []*Snippet // all snippets, indexed by snippet index 965 stats Statistics 966 importCount map[string]int // package path ("net/http") => count 967 packagePath map[string]map[string]bool // "template" => "text/template" => true 968 exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl 969 idents map[SpotKind]map[string][]Ident 970 opts indexOptions 971 } 972 973 func canonical(w string) string { return strings.ToLower(w) } 974 975 // Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems 976 // consuming file descriptors, where some systems have low 256 or 512 limits. 977 // Go should have a built-in way to cap fd usage under the ulimit. 978 const ( 979 maxOpenFiles = 200 980 maxOpenDirs = 50 981 ) 982 983 func (c *Corpus) throttle() float64 { 984 if c.IndexThrottle <= 0 { 985 return 0.9 986 } 987 if c.IndexThrottle > 1.0 { 988 return 1.0 989 } 990 return c.IndexThrottle 991 } 992 993 // NewIndex creates a new index for the .go files provided by the corpus. 994 func (c *Corpus) NewIndex() *Index { 995 // initialize Indexer 996 // (use some reasonably sized maps to start) 997 x := &Indexer{ 998 c: c, 999 fset: token.NewFileSet(), 1000 fsOpenGate: make(chan bool, maxOpenFiles), 1001 strings: make(map[string]string), 1002 packages: make(map[Pak]*Pak, 256), 1003 words: make(map[string]*IndexResult, 8192), 1004 throttle: util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time 1005 importCount: make(map[string]int), 1006 packagePath: make(map[string]map[string]bool), 1007 exports: make(map[string]map[string]SpotKind), 1008 idents: make(map[SpotKind]map[string][]Ident, 4), 1009 } 1010 1011 // index all files in the directories given by dirnames 1012 var wg sync.WaitGroup // outstanding ReadDir + visitFile 1013 dirGate := make(chan bool, maxOpenDirs) 1014 for dirname := range c.fsDirnames() { 1015 if c.IndexDirectory != nil && !c.IndexDirectory(dirname) { 1016 continue 1017 } 1018 dirGate <- true 1019 wg.Add(1) 1020 go func(dirname string) { 1021 defer func() { <-dirGate }() 1022 defer wg.Done() 1023 1024 list, err := c.fs.ReadDir(dirname) 1025 if err != nil { 1026 log.Printf("ReadDir(%q): %v; skipping directory", dirname, err) 1027 return // ignore this directory 1028 } 1029 for _, fi := range list { 1030 wg.Add(1) 1031 go func(fi os.FileInfo) { 1032 defer wg.Done() 1033 x.visitFile(dirname, fi) 1034 }(fi) 1035 } 1036 }(dirname) 1037 } 1038 wg.Wait() 1039 1040 if !c.IndexFullText { 1041 // the file set, the current file, and the sources are 1042 // not needed after indexing if no text index is built - 1043 // help GC and clear them 1044 x.fset = nil 1045 x.sources.Reset() 1046 x.current = nil // contains reference to fset! 1047 } 1048 1049 // for each word, reduce the RunLists into a LookupResult; 1050 // also collect the word with its canonical spelling in a 1051 // word list for later computation of alternative spellings 1052 words := make(map[string]*LookupResult) 1053 var wlist RunList 1054 for w, h := range x.words { 1055 decls := reduce(h.Decls) 1056 others := reduce(h.Others) 1057 words[w] = &LookupResult{ 1058 Decls: decls, 1059 Others: others, 1060 } 1061 wlist = append(wlist, &wordPair{canonical(w), w}) 1062 x.throttle.Throttle() 1063 } 1064 x.stats.Words = len(words) 1065 1066 // reduce the word list {canonical(w), w} into 1067 // a list of AltWords runs {canonical(w), {w}} 1068 alist := wlist.reduce(lessWordPair, newAltWords) 1069 1070 // convert alist into a map of alternative spellings 1071 alts := make(map[string]*AltWords) 1072 for i := 0; i < len(alist); i++ { 1073 a := alist[i].(*AltWords) 1074 alts[a.Canon] = a 1075 } 1076 1077 // create text index 1078 var suffixes *suffixarray.Index 1079 if c.IndexFullText { 1080 suffixes = suffixarray.New(x.sources.Bytes()) 1081 } 1082 1083 // sort idents by the number of imports of their respective packages 1084 for _, idMap := range x.idents { 1085 for _, ir := range idMap { 1086 sort.Sort(byImportCount{ir, x.importCount}) 1087 } 1088 } 1089 1090 return &Index{ 1091 fset: x.fset, 1092 suffixes: suffixes, 1093 words: words, 1094 alts: alts, 1095 snippets: x.snippets, 1096 stats: x.stats, 1097 importCount: x.importCount, 1098 packagePath: x.packagePath, 1099 exports: x.exports, 1100 idents: x.idents, 1101 opts: indexOptions{ 1102 Docs: x.c.IndexDocs, 1103 GoCode: x.c.IndexGoCode, 1104 FullText: x.c.IndexFullText, 1105 MaxResults: x.c.MaxResults, 1106 }, 1107 } 1108 } 1109 1110 var ErrFileIndexVersion = errors.New("file index version out of date") 1111 1112 const fileIndexVersion = 3 1113 1114 // fileIndex is the subset of Index that's gob-encoded for use by 1115 // Index.Write and Index.Read. 1116 type fileIndex struct { 1117 Version int 1118 Words map[string]*LookupResult 1119 Alts map[string]*AltWords 1120 Snippets []*Snippet 1121 Fulltext bool 1122 Stats Statistics 1123 ImportCount map[string]int 1124 PackagePath map[string]map[string]bool 1125 Exports map[string]map[string]SpotKind 1126 Idents map[SpotKind]map[string][]Ident 1127 Opts indexOptions 1128 } 1129 1130 func (x *fileIndex) Write(w io.Writer) error { 1131 return gob.NewEncoder(w).Encode(x) 1132 } 1133 1134 func (x *fileIndex) Read(r io.Reader) error { 1135 return gob.NewDecoder(r).Decode(x) 1136 } 1137 1138 // WriteTo writes the index x to w. 1139 func (x *Index) WriteTo(w io.Writer) (n int64, err error) { 1140 w = countingWriter{&n, w} 1141 fulltext := false 1142 if x.suffixes != nil { 1143 fulltext = true 1144 } 1145 fx := fileIndex{ 1146 Version: fileIndexVersion, 1147 Words: x.words, 1148 Alts: x.alts, 1149 Snippets: x.snippets, 1150 Fulltext: fulltext, 1151 Stats: x.stats, 1152 ImportCount: x.importCount, 1153 PackagePath: x.packagePath, 1154 Exports: x.exports, 1155 Idents: x.idents, 1156 Opts: x.opts, 1157 } 1158 if err := fx.Write(w); err != nil { 1159 return 0, err 1160 } 1161 if fulltext { 1162 encode := func(x interface{}) error { 1163 return gob.NewEncoder(w).Encode(x) 1164 } 1165 if err := x.fset.Write(encode); err != nil { 1166 return 0, err 1167 } 1168 if err := x.suffixes.Write(w); err != nil { 1169 return 0, err 1170 } 1171 } 1172 return n, nil 1173 } 1174 1175 // ReadFrom reads the index from r into x; x must not be nil. 1176 // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader. 1177 // If the index is from an old version, the error is ErrFileIndexVersion. 1178 func (x *Index) ReadFrom(r io.Reader) (n int64, err error) { 1179 // We use the ability to read bytes as a plausible surrogate for buffering. 1180 if _, ok := r.(io.ByteReader); !ok { 1181 r = bufio.NewReader(r) 1182 } 1183 r = countingReader{&n, r.(byteReader)} 1184 var fx fileIndex 1185 if err := fx.Read(r); err != nil { 1186 return n, err 1187 } 1188 if fx.Version != fileIndexVersion { 1189 return 0, ErrFileIndexVersion 1190 } 1191 x.words = fx.Words 1192 x.alts = fx.Alts 1193 x.snippets = fx.Snippets 1194 x.stats = fx.Stats 1195 x.importCount = fx.ImportCount 1196 x.packagePath = fx.PackagePath 1197 x.exports = fx.Exports 1198 x.idents = fx.Idents 1199 x.opts = fx.Opts 1200 if fx.Fulltext { 1201 x.fset = token.NewFileSet() 1202 decode := func(x interface{}) error { 1203 return gob.NewDecoder(r).Decode(x) 1204 } 1205 if err := x.fset.Read(decode); err != nil { 1206 return n, err 1207 } 1208 x.suffixes = new(suffixarray.Index) 1209 if err := x.suffixes.Read(r); err != nil { 1210 return n, err 1211 } 1212 } 1213 return n, nil 1214 } 1215 1216 // Stats returns index statistics. 1217 func (x *Index) Stats() Statistics { 1218 return x.stats 1219 } 1220 1221 // ImportCount returns a map from import paths to how many times they were seen. 1222 func (x *Index) ImportCount() map[string]int { 1223 return x.importCount 1224 } 1225 1226 // PackagePath returns a map from short package name to a set 1227 // of full package path names that use that short package name. 1228 func (x *Index) PackagePath() map[string]map[string]bool { 1229 return x.packagePath 1230 } 1231 1232 // Exports returns a map from full package path to exported 1233 // symbol name to its type. 1234 func (x *Index) Exports() map[string]map[string]SpotKind { 1235 return x.exports 1236 } 1237 1238 // Idents returns a map from identifier type to exported 1239 // symbol name to the list of identifiers matching that name. 1240 func (x *Index) Idents() map[SpotKind]map[string][]Ident { 1241 return x.idents 1242 } 1243 1244 func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) { 1245 match = x.words[w] 1246 alt = x.alts[canonical(w)] 1247 // remove current spelling from alternatives 1248 // (if there is no match, the alternatives do 1249 // not contain the current spelling) 1250 if match != nil && alt != nil { 1251 alt = alt.filter(w) 1252 } 1253 return 1254 } 1255 1256 // isIdentifier reports whether s is a Go identifier. 1257 func isIdentifier(s string) bool { 1258 for i, ch := range s { 1259 if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) { 1260 continue 1261 } 1262 return false 1263 } 1264 return len(s) > 0 1265 } 1266 1267 // For a given query, which is either a single identifier or a qualified 1268 // identifier, Lookup returns a SearchResult containing packages, a LookupResult, a 1269 // list of alternative spellings, and identifiers, if any. Any and all results 1270 // may be nil. If the query syntax is wrong, an error is reported. 1271 func (x *Index) Lookup(query string) (*SearchResult, error) { 1272 ss := strings.Split(query, ".") 1273 1274 // check query syntax 1275 for _, s := range ss { 1276 if !isIdentifier(s) { 1277 return nil, errors.New("all query parts must be identifiers") 1278 } 1279 } 1280 rslt := &SearchResult{ 1281 Query: query, 1282 Idents: make(map[SpotKind][]Ident, 5), 1283 } 1284 // handle simple and qualified identifiers 1285 switch len(ss) { 1286 case 1: 1287 ident := ss[0] 1288 rslt.Hit, rslt.Alt = x.lookupWord(ident) 1289 if rslt.Hit != nil { 1290 // found a match - filter packages with same name 1291 // for the list of packages called ident, if any 1292 rslt.Pak = rslt.Hit.Others.filter(ident) 1293 } 1294 for k, v := range x.idents { 1295 const rsltLimit = 50 1296 ids := byImportCount{v[ident], x.importCount} 1297 rslt.Idents[k] = ids.top(rsltLimit) 1298 } 1299 1300 case 2: 1301 pakname, ident := ss[0], ss[1] 1302 rslt.Hit, rslt.Alt = x.lookupWord(ident) 1303 if rslt.Hit != nil { 1304 // found a match - filter by package name 1305 // (no paks - package names are not qualified) 1306 decls := rslt.Hit.Decls.filter(pakname) 1307 others := rslt.Hit.Others.filter(pakname) 1308 rslt.Hit = &LookupResult{decls, others} 1309 } 1310 for k, v := range x.idents { 1311 ids := byImportCount{v[ident], x.importCount} 1312 rslt.Idents[k] = ids.filter(pakname) 1313 } 1314 1315 default: 1316 return nil, errors.New("query is not a (qualified) identifier") 1317 } 1318 1319 return rslt, nil 1320 } 1321 1322 func (x *Index) Snippet(i int) *Snippet { 1323 // handle illegal snippet indices gracefully 1324 if 0 <= i && i < len(x.snippets) { 1325 return x.snippets[i] 1326 } 1327 return nil 1328 } 1329 1330 type positionList []struct { 1331 filename string 1332 line int 1333 } 1334 1335 func (list positionList) Len() int { return len(list) } 1336 func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename } 1337 func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] } 1338 1339 // unique returns the list sorted and with duplicate entries removed 1340 func unique(list []int) []int { 1341 sort.Ints(list) 1342 var last int 1343 i := 0 1344 for _, x := range list { 1345 if i == 0 || x != last { 1346 last = x 1347 list[i] = x 1348 i++ 1349 } 1350 } 1351 return list[0:i] 1352 } 1353 1354 // A FileLines value specifies a file and line numbers within that file. 1355 type FileLines struct { 1356 Filename string 1357 Lines []int 1358 } 1359 1360 // LookupRegexp returns the number of matches and the matches where a regular 1361 // expression r is found in the full text index. At most n matches are 1362 // returned (thus found <= n). 1363 func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) { 1364 if x.suffixes == nil || n <= 0 { 1365 return 1366 } 1367 // n > 0 1368 1369 var list positionList 1370 // FindAllIndex may returns matches that span across file boundaries. 1371 // Such matches are unlikely, buf after eliminating them we may end up 1372 // with fewer than n matches. If we don't have enough at the end, redo 1373 // the search with an increased value n1, but only if FindAllIndex 1374 // returned all the requested matches in the first place (if it 1375 // returned fewer than that there cannot be more). 1376 for n1 := n; found < n; n1 += n - found { 1377 found = 0 1378 matches := x.suffixes.FindAllIndex(r, n1) 1379 // compute files, exclude matches that span file boundaries, 1380 // and map offsets to file-local offsets 1381 list = make(positionList, len(matches)) 1382 for _, m := range matches { 1383 // by construction, an offset corresponds to the Pos value 1384 // for the file set - use it to get the file and line 1385 p := token.Pos(m[0]) 1386 if file := x.fset.File(p); file != nil { 1387 if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() { 1388 // match [m[0], m[1]) is within the file boundaries 1389 list[found].filename = file.Name() 1390 list[found].line = file.Line(p) 1391 found++ 1392 } 1393 } 1394 } 1395 if found == n || len(matches) < n1 { 1396 // found all matches or there's no chance to find more 1397 break 1398 } 1399 } 1400 list = list[0:found] 1401 sort.Sort(list) // sort by filename 1402 1403 // collect matches belonging to the same file 1404 var last string 1405 var lines []int 1406 addLines := func() { 1407 if len(lines) > 0 { 1408 // remove duplicate lines 1409 result = append(result, FileLines{last, unique(lines)}) 1410 lines = nil 1411 } 1412 } 1413 for _, m := range list { 1414 if m.filename != last { 1415 addLines() 1416 last = m.filename 1417 } 1418 lines = append(lines, m.line) 1419 } 1420 addLines() 1421 1422 return 1423 } 1424 1425 // invalidateIndex should be called whenever any of the file systems 1426 // under godoc's observation change so that the indexer is kicked on. 1427 func (c *Corpus) invalidateIndex() { 1428 c.fsModified.Set(nil) 1429 c.refreshMetadata() 1430 } 1431 1432 // feedDirnames feeds the directory names of all directories 1433 // under the file system given by root to channel c. 1434 func (c *Corpus) feedDirnames(ch chan<- string) { 1435 if dir, _ := c.fsTree.Get(); dir != nil { 1436 for d := range dir.(*Directory).iter(false) { 1437 ch <- d.Path 1438 } 1439 } 1440 } 1441 1442 // fsDirnames() returns a channel sending all directory names 1443 // of all the file systems under godoc's observation. 1444 func (c *Corpus) fsDirnames() <-chan string { 1445 ch := make(chan string, 256) // buffered for fewer context switches 1446 go func() { 1447 c.feedDirnames(ch) 1448 close(ch) 1449 }() 1450 return ch 1451 } 1452 1453 // CompatibleWith reports whether the Index x is compatible with the corpus 1454 // indexing options set in c. 1455 func (x *Index) CompatibleWith(c *Corpus) bool { 1456 return x.opts.Docs == c.IndexDocs && 1457 x.opts.GoCode == c.IndexGoCode && 1458 x.opts.FullText == c.IndexFullText && 1459 x.opts.MaxResults == c.MaxResults 1460 } 1461 1462 func (c *Corpus) readIndex(filenames string) error { 1463 matches, err := filepath.Glob(filenames) 1464 if err != nil { 1465 return err 1466 } else if matches == nil { 1467 return fmt.Errorf("no index files match %q", filenames) 1468 } 1469 sort.Strings(matches) // make sure files are in the right order 1470 files := make([]io.Reader, 0, len(matches)) 1471 for _, filename := range matches { 1472 f, err := os.Open(filename) 1473 if err != nil { 1474 return err 1475 } 1476 defer f.Close() 1477 files = append(files, f) 1478 } 1479 return c.ReadIndexFrom(io.MultiReader(files...)) 1480 } 1481 1482 // ReadIndexFrom sets the current index from the serialized version found in r. 1483 func (c *Corpus) ReadIndexFrom(r io.Reader) error { 1484 x := new(Index) 1485 if _, err := x.ReadFrom(r); err != nil { 1486 return err 1487 } 1488 if !x.CompatibleWith(c) { 1489 return fmt.Errorf("index file options are incompatible: %v", x.opts) 1490 } 1491 c.searchIndex.Set(x) 1492 return nil 1493 } 1494 1495 func (c *Corpus) UpdateIndex() { 1496 if c.Verbose { 1497 log.Printf("updating index...") 1498 } 1499 start := time.Now() 1500 index := c.NewIndex() 1501 stop := time.Now() 1502 c.searchIndex.Set(index) 1503 if c.Verbose { 1504 secs := stop.Sub(start).Seconds() 1505 stats := index.Stats() 1506 log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)", 1507 secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots) 1508 } 1509 memstats := new(runtime.MemStats) 1510 runtime.ReadMemStats(memstats) 1511 if c.Verbose { 1512 log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) 1513 } 1514 runtime.GC() 1515 runtime.ReadMemStats(memstats) 1516 if c.Verbose { 1517 log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) 1518 } 1519 } 1520 1521 // RunIndexer runs forever, indexing. 1522 func (c *Corpus) RunIndexer() { 1523 // initialize the index from disk if possible 1524 if c.IndexFiles != "" { 1525 c.initFSTree() 1526 if err := c.readIndex(c.IndexFiles); err != nil { 1527 log.Printf("error reading index from file %s: %v", c.IndexFiles, err) 1528 } 1529 return 1530 } 1531 1532 // Repeatedly update the package directory tree and index. 1533 for { 1534 c.initFSTree() 1535 c.UpdateIndex() 1536 if c.IndexInterval < 0 { 1537 return 1538 } 1539 delay := 5 * time.Minute // by default, reindex every 5 minutes 1540 if c.IndexInterval > 0 { 1541 delay = c.IndexInterval 1542 } 1543 time.Sleep(delay) 1544 } 1545 } 1546 1547 type countingWriter struct { 1548 n *int64 1549 w io.Writer 1550 } 1551 1552 func (c countingWriter) Write(p []byte) (n int, err error) { 1553 n, err = c.w.Write(p) 1554 *c.n += int64(n) 1555 return 1556 } 1557 1558 type byteReader interface { 1559 io.Reader 1560 io.ByteReader 1561 } 1562 1563 type countingReader struct { 1564 n *int64 1565 r byteReader 1566 } 1567 1568 func (c countingReader) Read(p []byte) (n int, err error) { 1569 n, err = c.r.Read(p) 1570 *c.n += int64(n) 1571 return 1572 } 1573 1574 func (c countingReader) ReadByte() (b byte, err error) { 1575 b, err = c.r.ReadByte() 1576 *c.n += 1 1577 return 1578 }