github.com/v2fly/tools@v0.100.0/godoc/index.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file contains the infrastructure to create an 6 // identifier and full-text index for a set of Go files. 7 // 8 // Algorithm for identifier index: 9 // - traverse all .go files of the file tree specified by root 10 // - for each identifier (word) encountered, collect all occurrences (spots) 11 // into a list; this produces a list of spots for each word 12 // - reduce the lists: from a list of spots to a list of FileRuns, 13 // and from a list of FileRuns into a list of PakRuns 14 // - make a HitList from the PakRuns 15 // 16 // Details: 17 // - keep two lists per word: one containing package-level declarations 18 // that have snippets, and one containing all other spots 19 // - keep the snippets in a separate table indexed by snippet index 20 // and store the snippet index in place of the line number in a SpotInfo 21 // (the line number for spots with snippets is stored in the snippet) 22 // - at the end, create lists of alternative spellings for a given 23 // word 24 // 25 // Algorithm for full text index: 26 // - concatenate all source code in a byte buffer (in memory) 27 // - add the files to a file set in lockstep as they are added to the byte 28 // buffer such that a byte buffer offset corresponds to the Pos value for 29 // that file location 30 // - create a suffix array from the concatenated sources 31 // 32 // String lookup in full text index: 33 // - use the suffix array to lookup a string's offsets - the offsets 34 // correspond to the Pos values relative to the file set 35 // - translate the Pos values back into file and line information and 36 // sort the result 37 38 package godoc 39 40 import ( 41 "bufio" 42 "bytes" 43 "encoding/gob" 44 "errors" 45 "fmt" 46 "go/ast" 47 "go/doc" 48 "go/parser" 49 "go/token" 50 "index/suffixarray" 51 "io" 52 "log" 53 "os" 54 pathpkg "path" 55 "path/filepath" 56 "regexp" 57 "runtime" 58 "sort" 59 "strconv" 60 "strings" 61 "sync" 62 "time" 63 "unicode" 64 65 "github.com/v2fly/tools/godoc/util" 66 "github.com/v2fly/tools/godoc/vfs" 67 ) 68 69 // ---------------------------------------------------------------------------- 70 // InterfaceSlice is a helper type for sorting interface 71 // slices according to some slice-specific sort criteria. 72 73 type comparer func(x, y interface{}) bool 74 75 type interfaceSlice struct { 76 slice []interface{} 77 less comparer 78 } 79 80 // ---------------------------------------------------------------------------- 81 // RunList 82 83 // A RunList is a list of entries that can be sorted according to some 84 // criteria. A RunList may be compressed by grouping "runs" of entries 85 // which are equal (according to the sort criteria) into a new RunList of 86 // runs. For instance, a RunList containing pairs (x, y) may be compressed 87 // into a RunList containing pair runs (x, {y}) where each run consists of 88 // a list of y's with the same x. 89 type RunList []interface{} 90 91 func (h RunList) sort(less comparer) { 92 sort.Sort(&interfaceSlice{h, less}) 93 } 94 95 func (p *interfaceSlice) Len() int { return len(p.slice) } 96 func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) } 97 func (p *interfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] } 98 99 // Compress entries which are the same according to a sort criteria 100 // (specified by less) into "runs". 101 func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList { 102 if len(h) == 0 { 103 return nil 104 } 105 // len(h) > 0 106 107 // create runs of entries with equal values 108 h.sort(less) 109 110 // for each run, make a new run object and collect them in a new RunList 111 var hh RunList 112 i, x := 0, h[0] 113 for j, y := range h { 114 if less(x, y) { 115 hh = append(hh, newRun(h[i:j])) 116 i, x = j, h[j] // start a new run 117 } 118 } 119 // add final run, if any 120 if i < len(h) { 121 hh = append(hh, newRun(h[i:])) 122 } 123 124 return hh 125 } 126 127 // ---------------------------------------------------------------------------- 128 // KindRun 129 130 // Debugging support. Disable to see multiple entries per line. 131 const removeDuplicates = true 132 133 // A KindRun is a run of SpotInfos of the same kind in a given file. 134 // The kind (3 bits) is stored in each SpotInfo element; to find the 135 // kind of a KindRun, look at any of its elements. 136 type KindRun []SpotInfo 137 138 // KindRuns are sorted by line number or index. Since the isIndex bit 139 // is always the same for all infos in one list we can compare lori's. 140 func (k KindRun) Len() int { return len(k) } 141 func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() } 142 func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] } 143 144 // FileRun contents are sorted by Kind for the reduction into KindRuns. 145 func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() } 146 147 // newKindRun allocates a new KindRun from the SpotInfo run h. 148 func newKindRun(h RunList) interface{} { 149 run := make(KindRun, len(h)) 150 for i, x := range h { 151 run[i] = x.(SpotInfo) 152 } 153 154 // Spots were sorted by file and kind to create this run. 155 // Within this run, sort them by line number or index. 156 sort.Sort(run) 157 158 if removeDuplicates { 159 // Since both the lori and kind field must be 160 // same for duplicates, and since the isIndex 161 // bit is always the same for all infos in one 162 // list we can simply compare the entire info. 163 k := 0 164 prev := SpotInfo(1<<32 - 1) // an unlikely value 165 for _, x := range run { 166 if x != prev { 167 run[k] = x 168 k++ 169 prev = x 170 } 171 } 172 run = run[0:k] 173 } 174 175 return run 176 } 177 178 // ---------------------------------------------------------------------------- 179 // FileRun 180 181 // A Pak describes a Go package. 182 type Pak struct { 183 Path string // path of directory containing the package 184 Name string // package name as declared by package clause 185 } 186 187 // Paks are sorted by name (primary key) and by import path (secondary key). 188 func (p *Pak) less(q *Pak) bool { 189 return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path 190 } 191 192 // A File describes a Go file. 193 type File struct { 194 Name string // directory-local file name 195 Pak *Pak // the package to which the file belongs 196 } 197 198 // Path returns the file path of f. 199 func (f *File) Path() string { 200 return pathpkg.Join(f.Pak.Path, f.Name) 201 } 202 203 // A Spot describes a single occurrence of a word. 204 type Spot struct { 205 File *File 206 Info SpotInfo 207 } 208 209 // A FileRun is a list of KindRuns belonging to the same file. 210 type FileRun struct { 211 File *File 212 Groups []KindRun 213 } 214 215 // Spots are sorted by file path for the reduction into FileRuns. 216 func lessSpot(x, y interface{}) bool { 217 fx := x.(Spot).File 218 fy := y.(Spot).File 219 // same as "return fx.Path() < fy.Path()" but w/o computing the file path first 220 px := fx.Pak.Path 221 py := fy.Pak.Path 222 return px < py || px == py && fx.Name < fy.Name 223 } 224 225 // newFileRun allocates a new FileRun from the Spot run h. 226 func newFileRun(h RunList) interface{} { 227 file := h[0].(Spot).File 228 229 // reduce the list of Spots into a list of KindRuns 230 h1 := make(RunList, len(h)) 231 for i, x := range h { 232 h1[i] = x.(Spot).Info 233 } 234 h2 := h1.reduce(lessKind, newKindRun) 235 236 // create the FileRun 237 groups := make([]KindRun, len(h2)) 238 for i, x := range h2 { 239 groups[i] = x.(KindRun) 240 } 241 return &FileRun{file, groups} 242 } 243 244 // ---------------------------------------------------------------------------- 245 // PakRun 246 247 // A PakRun describes a run of *FileRuns of a package. 248 type PakRun struct { 249 Pak *Pak 250 Files []*FileRun 251 } 252 253 // Sorting support for files within a PakRun. 254 func (p *PakRun) Len() int { return len(p.Files) } 255 func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name } 256 func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] } 257 258 // FileRuns are sorted by package for the reduction into PakRuns. 259 func lessFileRun(x, y interface{}) bool { 260 return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak) 261 } 262 263 // newPakRun allocates a new PakRun from the *FileRun run h. 264 func newPakRun(h RunList) interface{} { 265 pak := h[0].(*FileRun).File.Pak 266 files := make([]*FileRun, len(h)) 267 for i, x := range h { 268 files[i] = x.(*FileRun) 269 } 270 run := &PakRun{pak, files} 271 sort.Sort(run) // files were sorted by package; sort them by file now 272 return run 273 } 274 275 // ---------------------------------------------------------------------------- 276 // HitList 277 278 // A HitList describes a list of PakRuns. 279 type HitList []*PakRun 280 281 // PakRuns are sorted by package. 282 func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) } 283 284 func reduce(h0 RunList) HitList { 285 // reduce a list of Spots into a list of FileRuns 286 h1 := h0.reduce(lessSpot, newFileRun) 287 // reduce a list of FileRuns into a list of PakRuns 288 h2 := h1.reduce(lessFileRun, newPakRun) 289 // sort the list of PakRuns by package 290 h2.sort(lessPakRun) 291 // create a HitList 292 h := make(HitList, len(h2)) 293 for i, p := range h2 { 294 h[i] = p.(*PakRun) 295 } 296 return h 297 } 298 299 // filter returns a new HitList created by filtering 300 // all PakRuns from h that have a matching pakname. 301 func (h HitList) filter(pakname string) HitList { 302 var hh HitList 303 for _, p := range h { 304 if p.Pak.Name == pakname { 305 hh = append(hh, p) 306 } 307 } 308 return hh 309 } 310 311 // ---------------------------------------------------------------------------- 312 // AltWords 313 314 type wordPair struct { 315 canon string // canonical word spelling (all lowercase) 316 alt string // alternative spelling 317 } 318 319 // An AltWords describes a list of alternative spellings for a 320 // canonical (all lowercase) spelling of a word. 321 type AltWords struct { 322 Canon string // canonical word spelling (all lowercase) 323 Alts []string // alternative spelling for the same word 324 } 325 326 // wordPairs are sorted by their canonical spelling. 327 func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon } 328 329 // newAltWords allocates a new AltWords from the *wordPair run h. 330 func newAltWords(h RunList) interface{} { 331 canon := h[0].(*wordPair).canon 332 alts := make([]string, len(h)) 333 for i, x := range h { 334 alts[i] = x.(*wordPair).alt 335 } 336 return &AltWords{canon, alts} 337 } 338 339 func (a *AltWords) filter(s string) *AltWords { 340 var alts []string 341 for _, w := range a.Alts { 342 if w != s { 343 alts = append(alts, w) 344 } 345 } 346 if len(alts) > 0 { 347 return &AltWords{a.Canon, alts} 348 } 349 return nil 350 } 351 352 // Ident stores information about external identifiers in order to create 353 // links to package documentation. 354 type Ident struct { 355 Path string // e.g. "net/http" 356 Package string // e.g. "http" 357 Name string // e.g. "NewRequest" 358 Doc string // e.g. "NewRequest returns a new Request..." 359 } 360 361 // byImportCount sorts the given slice of Idents by the import 362 // counts of the packages to which they belong. 363 type byImportCount struct { 364 Idents []Ident 365 ImportCount map[string]int 366 } 367 368 func (ic byImportCount) Len() int { 369 return len(ic.Idents) 370 } 371 372 func (ic byImportCount) Less(i, j int) bool { 373 ri := ic.ImportCount[ic.Idents[i].Path] 374 rj := ic.ImportCount[ic.Idents[j].Path] 375 if ri == rj { 376 return ic.Idents[i].Path < ic.Idents[j].Path 377 } 378 return ri > rj 379 } 380 381 func (ic byImportCount) Swap(i, j int) { 382 ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i] 383 } 384 385 func (ic byImportCount) String() string { 386 buf := bytes.NewBuffer([]byte("[")) 387 for _, v := range ic.Idents { 388 buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path])) 389 } 390 buf.WriteString("\n]") 391 return buf.String() 392 } 393 394 // filter creates a new Ident list where the results match the given 395 // package name. 396 func (ic byImportCount) filter(pakname string) []Ident { 397 if ic.Idents == nil { 398 return nil 399 } 400 var res []Ident 401 for _, i := range ic.Idents { 402 if i.Package == pakname { 403 res = append(res, i) 404 } 405 } 406 return res 407 } 408 409 // top returns the top n identifiers. 410 func (ic byImportCount) top(n int) []Ident { 411 if len(ic.Idents) > n { 412 return ic.Idents[:n] 413 } 414 return ic.Idents 415 } 416 417 // ---------------------------------------------------------------------------- 418 // Indexer 419 420 type IndexResult struct { 421 Decls RunList // package-level declarations (with snippets) 422 Others RunList // all other occurrences 423 } 424 425 // Statistics provides statistics information for an index. 426 type Statistics struct { 427 Bytes int // total size of indexed source files 428 Files int // number of indexed source files 429 Lines int // number of lines (all files) 430 Words int // number of different identifiers 431 Spots int // number of identifier occurrences 432 } 433 434 // An Indexer maintains the data structures and provides the machinery 435 // for indexing .go files under a file tree. It implements the path.Visitor 436 // interface for walking file trees, and the ast.Visitor interface for 437 // walking Go ASTs. 438 type Indexer struct { 439 c *Corpus 440 fset *token.FileSet // file set for all indexed files 441 fsOpenGate chan bool // send pre fs.Open; receive on close 442 443 mu sync.Mutex // guards all the following 444 sources bytes.Buffer // concatenated sources 445 strings map[string]string // interned string 446 packages map[Pak]*Pak // interned *Paks 447 words map[string]*IndexResult // RunLists of Spots 448 snippets []*Snippet // indices are stored in SpotInfos 449 current *token.File // last file added to file set 450 file *File // AST for current file 451 decl ast.Decl // AST for current decl 452 stats Statistics 453 throttle *util.Throttle 454 importCount map[string]int // package path ("net/http") => count 455 packagePath map[string]map[string]bool // "template" => "text/template" => true 456 exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl 457 curPkgExports map[string]SpotKind 458 idents map[SpotKind]map[string][]Ident // kind => name => list of Idents 459 } 460 461 func (x *Indexer) intern(s string) string { 462 if s, ok := x.strings[s]; ok { 463 return s 464 } 465 x.strings[s] = s 466 return s 467 } 468 469 func (x *Indexer) lookupPackage(path, name string) *Pak { 470 // In the source directory tree, more than one package may 471 // live in the same directory. For the packages map, construct 472 // a key that includes both the directory path and the package 473 // name. 474 key := Pak{Path: x.intern(path), Name: x.intern(name)} 475 pak := x.packages[key] 476 if pak == nil { 477 pak = &key 478 x.packages[key] = pak 479 } 480 return pak 481 } 482 483 func (x *Indexer) addSnippet(s *Snippet) int { 484 index := len(x.snippets) 485 x.snippets = append(x.snippets, s) 486 return index 487 } 488 489 func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) { 490 if id == nil { 491 return 492 } 493 name := x.intern(id.Name) 494 495 switch kind { 496 case TypeDecl, FuncDecl, ConstDecl, VarDecl: 497 x.curPkgExports[name] = kind 498 } 499 500 lists, found := x.words[name] 501 if !found { 502 lists = new(IndexResult) 503 x.words[name] = lists 504 } 505 506 if kind == Use || x.decl == nil { 507 if x.c.IndexGoCode { 508 // not a declaration or no snippet required 509 info := makeSpotInfo(kind, x.current.Line(id.Pos()), false) 510 lists.Others = append(lists.Others, Spot{x.file, info}) 511 } 512 } else { 513 // a declaration with snippet 514 index := x.addSnippet(NewSnippet(x.fset, x.decl, id)) 515 info := makeSpotInfo(kind, index, true) 516 lists.Decls = append(lists.Decls, Spot{x.file, info}) 517 } 518 519 x.stats.Spots++ 520 } 521 522 func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) { 523 for _, f := range flist.List { 524 x.decl = nil // no snippets for fields 525 for _, name := range f.Names { 526 x.visitIdent(kind, name) 527 } 528 ast.Walk(x, f.Type) 529 // ignore tag - not indexed at the moment 530 } 531 } 532 533 func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) { 534 switch n := spec.(type) { 535 case *ast.ImportSpec: 536 x.visitIdent(ImportDecl, n.Name) 537 if n.Path != nil { 538 if imp, err := strconv.Unquote(n.Path.Value); err == nil { 539 x.importCount[x.intern(imp)]++ 540 } 541 } 542 543 case *ast.ValueSpec: 544 for _, n := range n.Names { 545 x.visitIdent(kind, n) 546 } 547 ast.Walk(x, n.Type) 548 for _, v := range n.Values { 549 ast.Walk(x, v) 550 } 551 552 case *ast.TypeSpec: 553 x.visitIdent(TypeDecl, n.Name) 554 ast.Walk(x, n.Type) 555 } 556 } 557 558 func (x *Indexer) visitGenDecl(decl *ast.GenDecl) { 559 kind := VarDecl 560 if decl.Tok == token.CONST { 561 kind = ConstDecl 562 } 563 x.decl = decl 564 for _, s := range decl.Specs { 565 x.visitSpec(kind, s) 566 } 567 } 568 569 func (x *Indexer) Visit(node ast.Node) ast.Visitor { 570 switch n := node.(type) { 571 case nil: 572 // nothing to do 573 574 case *ast.Ident: 575 x.visitIdent(Use, n) 576 577 case *ast.FieldList: 578 x.visitFieldList(VarDecl, n) 579 580 case *ast.InterfaceType: 581 x.visitFieldList(MethodDecl, n.Methods) 582 583 case *ast.DeclStmt: 584 // local declarations should only be *ast.GenDecls; 585 // ignore incorrect ASTs 586 if decl, ok := n.Decl.(*ast.GenDecl); ok { 587 x.decl = nil // no snippets for local declarations 588 x.visitGenDecl(decl) 589 } 590 591 case *ast.GenDecl: 592 x.decl = n 593 x.visitGenDecl(n) 594 595 case *ast.FuncDecl: 596 kind := FuncDecl 597 if n.Recv != nil { 598 kind = MethodDecl 599 ast.Walk(x, n.Recv) 600 } 601 x.decl = n 602 x.visitIdent(kind, n.Name) 603 ast.Walk(x, n.Type) 604 if n.Body != nil { 605 ast.Walk(x, n.Body) 606 } 607 608 case *ast.File: 609 x.decl = nil 610 x.visitIdent(PackageClause, n.Name) 611 for _, d := range n.Decls { 612 ast.Walk(x, d) 613 } 614 615 default: 616 return x 617 } 618 619 return nil 620 } 621 622 // addFile adds a file to the index if possible and returns the file set file 623 // and the file's AST if it was successfully parsed as a Go file. If addFile 624 // failed (that is, if the file was not added), it returns file == nil. 625 func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) { 626 defer f.Close() 627 628 // The file set's base offset and x.sources size must be in lock-step; 629 // this permits the direct mapping of suffix array lookup results to 630 // to corresponding Pos values. 631 // 632 // When a file is added to the file set, its offset base increases by 633 // the size of the file + 1; and the initial base offset is 1. Add an 634 // extra byte to the sources here. 635 x.sources.WriteByte(0) 636 637 // If the sources length doesn't match the file set base at this point 638 // the file set implementation changed or we have another error. 639 base := x.fset.Base() 640 if x.sources.Len() != base { 641 panic("internal error: file base incorrect") 642 } 643 644 // append file contents (src) to x.sources 645 if _, err := x.sources.ReadFrom(f); err == nil { 646 src := x.sources.Bytes()[base:] 647 648 if goFile { 649 // parse the file and in the process add it to the file set 650 if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil { 651 file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file 652 return 653 } 654 // file has parse errors, and the AST may be incorrect - 655 // set lines information explicitly and index as ordinary 656 // text file (cannot fall through to the text case below 657 // because the file has already been added to the file set 658 // by the parser) 659 file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file 660 file.SetLinesForContent(src) 661 ast = nil 662 return 663 } 664 665 if util.IsText(src) { 666 // only add the file to the file set (for the full text index) 667 file = x.fset.AddFile(filename, x.fset.Base(), len(src)) 668 file.SetLinesForContent(src) 669 return 670 } 671 } 672 673 // discard possibly added data 674 x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added 675 return 676 } 677 678 // Design note: Using an explicit white list of permitted files for indexing 679 // makes sure that the important files are included and massively reduces the 680 // number of files to index. The advantage over a blacklist is that unexpected 681 // (non-blacklisted) files won't suddenly explode the index. 682 683 // Files are whitelisted if they have a file name or extension 684 // present as key in whitelisted. 685 var whitelisted = map[string]bool{ 686 ".bash": true, 687 ".c": true, 688 ".cc": true, 689 ".cpp": true, 690 ".cxx": true, 691 ".css": true, 692 ".go": true, 693 ".goc": true, 694 ".h": true, 695 ".hh": true, 696 ".hpp": true, 697 ".hxx": true, 698 ".html": true, 699 ".js": true, 700 ".out": true, 701 ".py": true, 702 ".s": true, 703 ".sh": true, 704 ".txt": true, 705 ".xml": true, 706 "AUTHORS": true, 707 "CONTRIBUTORS": true, 708 "LICENSE": true, 709 "Makefile": true, 710 "PATENTS": true, 711 "README": true, 712 } 713 714 // isWhitelisted returns true if a file is on the list 715 // of "permitted" files for indexing. The filename must 716 // be the directory-local name of the file. 717 func isWhitelisted(filename string) bool { 718 key := pathpkg.Ext(filename) 719 if key == "" { 720 // file has no extension - use entire filename 721 key = filename 722 } 723 return whitelisted[key] 724 } 725 726 func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) { 727 pkgName := x.intern(astFile.Name.Name) 728 if pkgName == "main" { 729 return 730 } 731 pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/")) 732 astPkg := ast.Package{ 733 Name: pkgName, 734 Files: map[string]*ast.File{ 735 filename: astFile, 736 }, 737 } 738 var m doc.Mode 739 docPkg := doc.New(&astPkg, dirname, m) 740 addIdent := func(sk SpotKind, name string, docstr string) { 741 if x.idents[sk] == nil { 742 x.idents[sk] = make(map[string][]Ident) 743 } 744 name = x.intern(name) 745 x.idents[sk][name] = append(x.idents[sk][name], Ident{ 746 Path: pkgPath, 747 Package: pkgName, 748 Name: name, 749 Doc: doc.Synopsis(docstr), 750 }) 751 } 752 753 if x.idents[PackageClause] == nil { 754 x.idents[PackageClause] = make(map[string][]Ident) 755 } 756 // List of words under which the package identifier will be stored. 757 // This includes the package name and the components of the directory 758 // in which it resides. 759 words := strings.Split(pathpkg.Dir(pkgPath), "/") 760 if words[0] == "." { 761 words = []string{} 762 } 763 name := x.intern(docPkg.Name) 764 synopsis := doc.Synopsis(docPkg.Doc) 765 words = append(words, name) 766 pkgIdent := Ident{ 767 Path: pkgPath, 768 Package: pkgName, 769 Name: name, 770 Doc: synopsis, 771 } 772 for _, word := range words { 773 word = x.intern(word) 774 found := false 775 pkgs := x.idents[PackageClause][word] 776 for i, p := range pkgs { 777 if p.Path == pkgPath { 778 if docPkg.Doc != "" { 779 p.Doc = synopsis 780 pkgs[i] = p 781 } 782 found = true 783 break 784 } 785 } 786 if !found { 787 x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent) 788 } 789 } 790 791 for _, c := range docPkg.Consts { 792 for _, name := range c.Names { 793 addIdent(ConstDecl, name, c.Doc) 794 } 795 } 796 for _, t := range docPkg.Types { 797 addIdent(TypeDecl, t.Name, t.Doc) 798 for _, c := range t.Consts { 799 for _, name := range c.Names { 800 addIdent(ConstDecl, name, c.Doc) 801 } 802 } 803 for _, v := range t.Vars { 804 for _, name := range v.Names { 805 addIdent(VarDecl, name, v.Doc) 806 } 807 } 808 for _, f := range t.Funcs { 809 addIdent(FuncDecl, f.Name, f.Doc) 810 } 811 for _, f := range t.Methods { 812 addIdent(MethodDecl, f.Name, f.Doc) 813 // Change the name of methods to be "<typename>.<methodname>". 814 // They will still be indexed as <methodname>. 815 idents := x.idents[MethodDecl][f.Name] 816 idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name) 817 } 818 } 819 for _, v := range docPkg.Vars { 820 for _, name := range v.Names { 821 addIdent(VarDecl, name, v.Doc) 822 } 823 } 824 for _, f := range docPkg.Funcs { 825 addIdent(FuncDecl, f.Name, f.Doc) 826 } 827 } 828 829 func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) { 830 pkgName := astFile.Name.Name 831 832 if x.c.IndexGoCode { 833 x.current = file 834 pak := x.lookupPackage(dirname, pkgName) 835 x.file = &File{filename, pak} 836 ast.Walk(x, astFile) 837 } 838 839 if x.c.IndexDocs { 840 // Test files are already filtered out in visitFile if IndexGoCode and 841 // IndexFullText are false. Otherwise, check here. 842 isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) && 843 (strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/")) 844 if !isTestFile { 845 x.indexDocs(dirname, filename, astFile) 846 } 847 } 848 849 ppKey := x.intern(pkgName) 850 if _, ok := x.packagePath[ppKey]; !ok { 851 x.packagePath[ppKey] = make(map[string]bool) 852 } 853 pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/")) 854 x.packagePath[ppKey][pkgPath] = true 855 856 // Merge in exported symbols found walking this file into 857 // the map for that package. 858 if len(x.curPkgExports) > 0 { 859 dest, ok := x.exports[pkgPath] 860 if !ok { 861 dest = make(map[string]SpotKind) 862 x.exports[pkgPath] = dest 863 } 864 for k, v := range x.curPkgExports { 865 dest[k] = v 866 } 867 } 868 } 869 870 func (x *Indexer) visitFile(dirname string, fi os.FileInfo) { 871 if fi.IsDir() || !x.c.IndexEnabled { 872 return 873 } 874 875 filename := pathpkg.Join(dirname, fi.Name()) 876 goFile := isGoFile(fi) 877 878 switch { 879 case x.c.IndexFullText: 880 if !isWhitelisted(fi.Name()) { 881 return 882 } 883 case x.c.IndexGoCode: 884 if !goFile { 885 return 886 } 887 case x.c.IndexDocs: 888 if !goFile || 889 strings.HasSuffix(fi.Name(), "_test.go") || 890 strings.HasPrefix(dirname, "/test/") { 891 return 892 } 893 default: 894 // No indexing turned on. 895 return 896 } 897 898 x.fsOpenGate <- true 899 defer func() { <-x.fsOpenGate }() 900 901 // open file 902 f, err := x.c.fs.Open(filename) 903 if err != nil { 904 return 905 } 906 907 x.mu.Lock() 908 defer x.mu.Unlock() 909 910 x.throttle.Throttle() 911 912 x.curPkgExports = make(map[string]SpotKind) 913 file, fast := x.addFile(f, filename, goFile) 914 if file == nil { 915 return // addFile failed 916 } 917 918 if fast != nil { 919 x.indexGoFile(dirname, fi.Name(), file, fast) 920 } 921 922 // update statistics 923 x.stats.Bytes += file.Size() 924 x.stats.Files++ 925 x.stats.Lines += file.LineCount() 926 } 927 928 // indexOptions contains information that affects the contents of an index. 929 type indexOptions struct { 930 // Docs provides documentation search results. 931 // It is only consulted if IndexEnabled is true. 932 // The default values is true. 933 Docs bool 934 935 // GoCode provides Go source code search results. 936 // It is only consulted if IndexEnabled is true. 937 // The default values is true. 938 GoCode bool 939 940 // FullText provides search results from all files. 941 // It is only consulted if IndexEnabled is true. 942 // The default values is true. 943 FullText bool 944 945 // MaxResults optionally specifies the maximum results for indexing. 946 // The default is 1000. 947 MaxResults int 948 } 949 950 // ---------------------------------------------------------------------------- 951 // Index 952 953 type LookupResult struct { 954 Decls HitList // package-level declarations (with snippets) 955 Others HitList // all other occurrences 956 } 957 958 type Index struct { 959 fset *token.FileSet // file set used during indexing; nil if no textindex 960 suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex 961 words map[string]*LookupResult // maps words to hit lists 962 alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings 963 snippets []*Snippet // all snippets, indexed by snippet index 964 stats Statistics 965 importCount map[string]int // package path ("net/http") => count 966 packagePath map[string]map[string]bool // "template" => "text/template" => true 967 exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl 968 idents map[SpotKind]map[string][]Ident 969 opts indexOptions 970 } 971 972 func canonical(w string) string { return strings.ToLower(w) } 973 974 // Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems 975 // consuming file descriptors, where some systems have low 256 or 512 limits. 976 // Go should have a built-in way to cap fd usage under the ulimit. 977 const ( 978 maxOpenFiles = 200 979 maxOpenDirs = 50 980 ) 981 982 func (c *Corpus) throttle() float64 { 983 if c.IndexThrottle <= 0 { 984 return 0.9 985 } 986 if c.IndexThrottle > 1.0 { 987 return 1.0 988 } 989 return c.IndexThrottle 990 } 991 992 // NewIndex creates a new index for the .go files provided by the corpus. 993 func (c *Corpus) NewIndex() *Index { 994 // initialize Indexer 995 // (use some reasonably sized maps to start) 996 x := &Indexer{ 997 c: c, 998 fset: token.NewFileSet(), 999 fsOpenGate: make(chan bool, maxOpenFiles), 1000 strings: make(map[string]string), 1001 packages: make(map[Pak]*Pak, 256), 1002 words: make(map[string]*IndexResult, 8192), 1003 throttle: util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time 1004 importCount: make(map[string]int), 1005 packagePath: make(map[string]map[string]bool), 1006 exports: make(map[string]map[string]SpotKind), 1007 idents: make(map[SpotKind]map[string][]Ident, 4), 1008 } 1009 1010 // index all files in the directories given by dirnames 1011 var wg sync.WaitGroup // outstanding ReadDir + visitFile 1012 dirGate := make(chan bool, maxOpenDirs) 1013 for dirname := range c.fsDirnames() { 1014 if c.IndexDirectory != nil && !c.IndexDirectory(dirname) { 1015 continue 1016 } 1017 dirGate <- true 1018 wg.Add(1) 1019 go func(dirname string) { 1020 defer func() { <-dirGate }() 1021 defer wg.Done() 1022 1023 list, err := c.fs.ReadDir(dirname) 1024 if err != nil { 1025 log.Printf("ReadDir(%q): %v; skipping directory", dirname, err) 1026 return // ignore this directory 1027 } 1028 for _, fi := range list { 1029 wg.Add(1) 1030 go func(fi os.FileInfo) { 1031 defer wg.Done() 1032 x.visitFile(dirname, fi) 1033 }(fi) 1034 } 1035 }(dirname) 1036 } 1037 wg.Wait() 1038 1039 if !c.IndexFullText { 1040 // the file set, the current file, and the sources are 1041 // not needed after indexing if no text index is built - 1042 // help GC and clear them 1043 x.fset = nil 1044 x.sources.Reset() 1045 x.current = nil // contains reference to fset! 1046 } 1047 1048 // for each word, reduce the RunLists into a LookupResult; 1049 // also collect the word with its canonical spelling in a 1050 // word list for later computation of alternative spellings 1051 words := make(map[string]*LookupResult) 1052 var wlist RunList 1053 for w, h := range x.words { 1054 decls := reduce(h.Decls) 1055 others := reduce(h.Others) 1056 words[w] = &LookupResult{ 1057 Decls: decls, 1058 Others: others, 1059 } 1060 wlist = append(wlist, &wordPair{canonical(w), w}) 1061 x.throttle.Throttle() 1062 } 1063 x.stats.Words = len(words) 1064 1065 // reduce the word list {canonical(w), w} into 1066 // a list of AltWords runs {canonical(w), {w}} 1067 alist := wlist.reduce(lessWordPair, newAltWords) 1068 1069 // convert alist into a map of alternative spellings 1070 alts := make(map[string]*AltWords) 1071 for i := 0; i < len(alist); i++ { 1072 a := alist[i].(*AltWords) 1073 alts[a.Canon] = a 1074 } 1075 1076 // create text index 1077 var suffixes *suffixarray.Index 1078 if c.IndexFullText { 1079 suffixes = suffixarray.New(x.sources.Bytes()) 1080 } 1081 1082 // sort idents by the number of imports of their respective packages 1083 for _, idMap := range x.idents { 1084 for _, ir := range idMap { 1085 sort.Sort(byImportCount{ir, x.importCount}) 1086 } 1087 } 1088 1089 return &Index{ 1090 fset: x.fset, 1091 suffixes: suffixes, 1092 words: words, 1093 alts: alts, 1094 snippets: x.snippets, 1095 stats: x.stats, 1096 importCount: x.importCount, 1097 packagePath: x.packagePath, 1098 exports: x.exports, 1099 idents: x.idents, 1100 opts: indexOptions{ 1101 Docs: x.c.IndexDocs, 1102 GoCode: x.c.IndexGoCode, 1103 FullText: x.c.IndexFullText, 1104 MaxResults: x.c.MaxResults, 1105 }, 1106 } 1107 } 1108 1109 var ErrFileIndexVersion = errors.New("file index version out of date") 1110 1111 const fileIndexVersion = 3 1112 1113 // fileIndex is the subset of Index that's gob-encoded for use by 1114 // Index.Write and Index.Read. 1115 type fileIndex struct { 1116 Version int 1117 Words map[string]*LookupResult 1118 Alts map[string]*AltWords 1119 Snippets []*Snippet 1120 Fulltext bool 1121 Stats Statistics 1122 ImportCount map[string]int 1123 PackagePath map[string]map[string]bool 1124 Exports map[string]map[string]SpotKind 1125 Idents map[SpotKind]map[string][]Ident 1126 Opts indexOptions 1127 } 1128 1129 func (x *fileIndex) Write(w io.Writer) error { 1130 return gob.NewEncoder(w).Encode(x) 1131 } 1132 1133 func (x *fileIndex) Read(r io.Reader) error { 1134 return gob.NewDecoder(r).Decode(x) 1135 } 1136 1137 // WriteTo writes the index x to w. 1138 func (x *Index) WriteTo(w io.Writer) (n int64, err error) { 1139 w = countingWriter{&n, w} 1140 fulltext := false 1141 if x.suffixes != nil { 1142 fulltext = true 1143 } 1144 fx := fileIndex{ 1145 Version: fileIndexVersion, 1146 Words: x.words, 1147 Alts: x.alts, 1148 Snippets: x.snippets, 1149 Fulltext: fulltext, 1150 Stats: x.stats, 1151 ImportCount: x.importCount, 1152 PackagePath: x.packagePath, 1153 Exports: x.exports, 1154 Idents: x.idents, 1155 Opts: x.opts, 1156 } 1157 if err := fx.Write(w); err != nil { 1158 return 0, err 1159 } 1160 if fulltext { 1161 encode := func(x interface{}) error { 1162 return gob.NewEncoder(w).Encode(x) 1163 } 1164 if err := x.fset.Write(encode); err != nil { 1165 return 0, err 1166 } 1167 if err := x.suffixes.Write(w); err != nil { 1168 return 0, err 1169 } 1170 } 1171 return n, nil 1172 } 1173 1174 // ReadFrom reads the index from r into x; x must not be nil. 1175 // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader. 1176 // If the index is from an old version, the error is ErrFileIndexVersion. 1177 func (x *Index) ReadFrom(r io.Reader) (n int64, err error) { 1178 // We use the ability to read bytes as a plausible surrogate for buffering. 1179 if _, ok := r.(io.ByteReader); !ok { 1180 r = bufio.NewReader(r) 1181 } 1182 r = countingReader{&n, r.(byteReader)} 1183 var fx fileIndex 1184 if err := fx.Read(r); err != nil { 1185 return n, err 1186 } 1187 if fx.Version != fileIndexVersion { 1188 return 0, ErrFileIndexVersion 1189 } 1190 x.words = fx.Words 1191 x.alts = fx.Alts 1192 x.snippets = fx.Snippets 1193 x.stats = fx.Stats 1194 x.importCount = fx.ImportCount 1195 x.packagePath = fx.PackagePath 1196 x.exports = fx.Exports 1197 x.idents = fx.Idents 1198 x.opts = fx.Opts 1199 if fx.Fulltext { 1200 x.fset = token.NewFileSet() 1201 decode := func(x interface{}) error { 1202 return gob.NewDecoder(r).Decode(x) 1203 } 1204 if err := x.fset.Read(decode); err != nil { 1205 return n, err 1206 } 1207 x.suffixes = new(suffixarray.Index) 1208 if err := x.suffixes.Read(r); err != nil { 1209 return n, err 1210 } 1211 } 1212 return n, nil 1213 } 1214 1215 // Stats returns index statistics. 1216 func (x *Index) Stats() Statistics { 1217 return x.stats 1218 } 1219 1220 // ImportCount returns a map from import paths to how many times they were seen. 1221 func (x *Index) ImportCount() map[string]int { 1222 return x.importCount 1223 } 1224 1225 // PackagePath returns a map from short package name to a set 1226 // of full package path names that use that short package name. 1227 func (x *Index) PackagePath() map[string]map[string]bool { 1228 return x.packagePath 1229 } 1230 1231 // Exports returns a map from full package path to exported 1232 // symbol name to its type. 1233 func (x *Index) Exports() map[string]map[string]SpotKind { 1234 return x.exports 1235 } 1236 1237 // Idents returns a map from identifier type to exported 1238 // symbol name to the list of identifiers matching that name. 1239 func (x *Index) Idents() map[SpotKind]map[string][]Ident { 1240 return x.idents 1241 } 1242 1243 func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) { 1244 match = x.words[w] 1245 alt = x.alts[canonical(w)] 1246 // remove current spelling from alternatives 1247 // (if there is no match, the alternatives do 1248 // not contain the current spelling) 1249 if match != nil && alt != nil { 1250 alt = alt.filter(w) 1251 } 1252 return 1253 } 1254 1255 // isIdentifier reports whether s is a Go identifier. 1256 func isIdentifier(s string) bool { 1257 for i, ch := range s { 1258 if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) { 1259 continue 1260 } 1261 return false 1262 } 1263 return len(s) > 0 1264 } 1265 1266 // For a given query, which is either a single identifier or a qualified 1267 // identifier, Lookup returns a SearchResult containing packages, a LookupResult, a 1268 // list of alternative spellings, and identifiers, if any. Any and all results 1269 // may be nil. If the query syntax is wrong, an error is reported. 1270 func (x *Index) Lookup(query string) (*SearchResult, error) { 1271 ss := strings.Split(query, ".") 1272 1273 // check query syntax 1274 for _, s := range ss { 1275 if !isIdentifier(s) { 1276 return nil, errors.New("all query parts must be identifiers") 1277 } 1278 } 1279 rslt := &SearchResult{ 1280 Query: query, 1281 Idents: make(map[SpotKind][]Ident, 5), 1282 } 1283 // handle simple and qualified identifiers 1284 switch len(ss) { 1285 case 1: 1286 ident := ss[0] 1287 rslt.Hit, rslt.Alt = x.lookupWord(ident) 1288 if rslt.Hit != nil { 1289 // found a match - filter packages with same name 1290 // for the list of packages called ident, if any 1291 rslt.Pak = rslt.Hit.Others.filter(ident) 1292 } 1293 for k, v := range x.idents { 1294 const rsltLimit = 50 1295 ids := byImportCount{v[ident], x.importCount} 1296 rslt.Idents[k] = ids.top(rsltLimit) 1297 } 1298 1299 case 2: 1300 pakname, ident := ss[0], ss[1] 1301 rslt.Hit, rslt.Alt = x.lookupWord(ident) 1302 if rslt.Hit != nil { 1303 // found a match - filter by package name 1304 // (no paks - package names are not qualified) 1305 decls := rslt.Hit.Decls.filter(pakname) 1306 others := rslt.Hit.Others.filter(pakname) 1307 rslt.Hit = &LookupResult{decls, others} 1308 } 1309 for k, v := range x.idents { 1310 ids := byImportCount{v[ident], x.importCount} 1311 rslt.Idents[k] = ids.filter(pakname) 1312 } 1313 1314 default: 1315 return nil, errors.New("query is not a (qualified) identifier") 1316 } 1317 1318 return rslt, nil 1319 } 1320 1321 func (x *Index) Snippet(i int) *Snippet { 1322 // handle illegal snippet indices gracefully 1323 if 0 <= i && i < len(x.snippets) { 1324 return x.snippets[i] 1325 } 1326 return nil 1327 } 1328 1329 type positionList []struct { 1330 filename string 1331 line int 1332 } 1333 1334 func (list positionList) Len() int { return len(list) } 1335 func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename } 1336 func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] } 1337 1338 // unique returns the list sorted and with duplicate entries removed 1339 func unique(list []int) []int { 1340 sort.Ints(list) 1341 var last int 1342 i := 0 1343 for _, x := range list { 1344 if i == 0 || x != last { 1345 last = x 1346 list[i] = x 1347 i++ 1348 } 1349 } 1350 return list[0:i] 1351 } 1352 1353 // A FileLines value specifies a file and line numbers within that file. 1354 type FileLines struct { 1355 Filename string 1356 Lines []int 1357 } 1358 1359 // LookupRegexp returns the number of matches and the matches where a regular 1360 // expression r is found in the full text index. At most n matches are 1361 // returned (thus found <= n). 1362 // 1363 func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) { 1364 if x.suffixes == nil || n <= 0 { 1365 return 1366 } 1367 // n > 0 1368 1369 var list positionList 1370 // FindAllIndex may returns matches that span across file boundaries. 1371 // Such matches are unlikely, buf after eliminating them we may end up 1372 // with fewer than n matches. If we don't have enough at the end, redo 1373 // the search with an increased value n1, but only if FindAllIndex 1374 // returned all the requested matches in the first place (if it 1375 // returned fewer than that there cannot be more). 1376 for n1 := n; found < n; n1 += n - found { 1377 found = 0 1378 matches := x.suffixes.FindAllIndex(r, n1) 1379 // compute files, exclude matches that span file boundaries, 1380 // and map offsets to file-local offsets 1381 list = make(positionList, len(matches)) 1382 for _, m := range matches { 1383 // by construction, an offset corresponds to the Pos value 1384 // for the file set - use it to get the file and line 1385 p := token.Pos(m[0]) 1386 if file := x.fset.File(p); file != nil { 1387 if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() { 1388 // match [m[0], m[1]) is within the file boundaries 1389 list[found].filename = file.Name() 1390 list[found].line = file.Line(p) 1391 found++ 1392 } 1393 } 1394 } 1395 if found == n || len(matches) < n1 { 1396 // found all matches or there's no chance to find more 1397 break 1398 } 1399 } 1400 list = list[0:found] 1401 sort.Sort(list) // sort by filename 1402 1403 // collect matches belonging to the same file 1404 var last string 1405 var lines []int 1406 addLines := func() { 1407 if len(lines) > 0 { 1408 // remove duplicate lines 1409 result = append(result, FileLines{last, unique(lines)}) 1410 lines = nil 1411 } 1412 } 1413 for _, m := range list { 1414 if m.filename != last { 1415 addLines() 1416 last = m.filename 1417 } 1418 lines = append(lines, m.line) 1419 } 1420 addLines() 1421 1422 return 1423 } 1424 1425 // InvalidateIndex should be called whenever any of the file systems 1426 // under godoc's observation change so that the indexer is kicked on. 1427 func (c *Corpus) invalidateIndex() { 1428 c.fsModified.Set(nil) 1429 c.refreshMetadata() 1430 } 1431 1432 // feedDirnames feeds the directory names of all directories 1433 // under the file system given by root to channel c. 1434 // 1435 func (c *Corpus) feedDirnames(ch chan<- string) { 1436 if dir, _ := c.fsTree.Get(); dir != nil { 1437 for d := range dir.(*Directory).iter(false) { 1438 ch <- d.Path 1439 } 1440 } 1441 } 1442 1443 // fsDirnames() returns a channel sending all directory names 1444 // of all the file systems under godoc's observation. 1445 // 1446 func (c *Corpus) fsDirnames() <-chan string { 1447 ch := make(chan string, 256) // buffered for fewer context switches 1448 go func() { 1449 c.feedDirnames(ch) 1450 close(ch) 1451 }() 1452 return ch 1453 } 1454 1455 // CompatibleWith reports whether the Index x is compatible with the corpus 1456 // indexing options set in c. 1457 func (x *Index) CompatibleWith(c *Corpus) bool { 1458 return x.opts.Docs == c.IndexDocs && 1459 x.opts.GoCode == c.IndexGoCode && 1460 x.opts.FullText == c.IndexFullText && 1461 x.opts.MaxResults == c.MaxResults 1462 } 1463 1464 func (c *Corpus) readIndex(filenames string) error { 1465 matches, err := filepath.Glob(filenames) 1466 if err != nil { 1467 return err 1468 } else if matches == nil { 1469 return fmt.Errorf("no index files match %q", filenames) 1470 } 1471 sort.Strings(matches) // make sure files are in the right order 1472 files := make([]io.Reader, 0, len(matches)) 1473 for _, filename := range matches { 1474 f, err := os.Open(filename) 1475 if err != nil { 1476 return err 1477 } 1478 defer f.Close() 1479 files = append(files, f) 1480 } 1481 return c.ReadIndexFrom(io.MultiReader(files...)) 1482 } 1483 1484 // ReadIndexFrom sets the current index from the serialized version found in r. 1485 func (c *Corpus) ReadIndexFrom(r io.Reader) error { 1486 x := new(Index) 1487 if _, err := x.ReadFrom(r); err != nil { 1488 return err 1489 } 1490 if !x.CompatibleWith(c) { 1491 return fmt.Errorf("index file options are incompatible: %v", x.opts) 1492 } 1493 c.searchIndex.Set(x) 1494 return nil 1495 } 1496 1497 func (c *Corpus) UpdateIndex() { 1498 if c.Verbose { 1499 log.Printf("updating index...") 1500 } 1501 start := time.Now() 1502 index := c.NewIndex() 1503 stop := time.Now() 1504 c.searchIndex.Set(index) 1505 if c.Verbose { 1506 secs := stop.Sub(start).Seconds() 1507 stats := index.Stats() 1508 log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)", 1509 secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots) 1510 } 1511 memstats := new(runtime.MemStats) 1512 runtime.ReadMemStats(memstats) 1513 if c.Verbose { 1514 log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) 1515 } 1516 runtime.GC() 1517 runtime.ReadMemStats(memstats) 1518 if c.Verbose { 1519 log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) 1520 } 1521 } 1522 1523 // RunIndexer runs forever, indexing. 1524 func (c *Corpus) RunIndexer() { 1525 // initialize the index from disk if possible 1526 if c.IndexFiles != "" { 1527 c.initFSTree() 1528 if err := c.readIndex(c.IndexFiles); err != nil { 1529 log.Printf("error reading index from file %s: %v", c.IndexFiles, err) 1530 } 1531 return 1532 } 1533 1534 // Repeatedly update the package directory tree and index. 1535 for { 1536 c.initFSTree() 1537 c.UpdateIndex() 1538 if c.IndexInterval < 0 { 1539 return 1540 } 1541 delay := 5 * time.Minute // by default, reindex every 5 minutes 1542 if c.IndexInterval > 0 { 1543 delay = c.IndexInterval 1544 } 1545 time.Sleep(delay) 1546 } 1547 } 1548 1549 type countingWriter struct { 1550 n *int64 1551 w io.Writer 1552 } 1553 1554 func (c countingWriter) Write(p []byte) (n int, err error) { 1555 n, err = c.w.Write(p) 1556 *c.n += int64(n) 1557 return 1558 } 1559 1560 type byteReader interface { 1561 io.Reader 1562 io.ByteReader 1563 } 1564 1565 type countingReader struct { 1566 n *int64 1567 r byteReader 1568 } 1569 1570 func (c countingReader) Read(p []byte) (n int, err error) { 1571 n, err = c.r.Read(p) 1572 *c.n += int64(n) 1573 return 1574 } 1575 1576 func (c countingReader) ReadByte() (b byte, err error) { 1577 b, err = c.r.ReadByte() 1578 *c.n += 1 1579 return 1580 }