github.com/powerman/golang-tools@v0.1.11-0.20220410185822-5ad214d8d803/godoc/index.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file contains the infrastructure to create an
     6  // identifier and full-text index for a set of Go files.
     7  //
     8  // Algorithm for identifier index:
     9  // - traverse all .go files of the file tree specified by root
    10  // - for each identifier (word) encountered, collect all occurrences (spots)
    11  //   into a list; this produces a list of spots for each word
    12  // - reduce the lists: from a list of spots to a list of FileRuns,
    13  //   and from a list of FileRuns into a list of PakRuns
    14  // - make a HitList from the PakRuns
    15  //
    16  // Details:
    17  // - keep two lists per word: one containing package-level declarations
    18  //   that have snippets, and one containing all other spots
    19  // - keep the snippets in a separate table indexed by snippet index
    20  //   and store the snippet index in place of the line number in a SpotInfo
    21  //   (the line number for spots with snippets is stored in the snippet)
    22  // - at the end, create lists of alternative spellings for a given
    23  //   word
    24  //
    25  // Algorithm for full text index:
    26  // - concatenate all source code in a byte buffer (in memory)
    27  // - add the files to a file set in lockstep as they are added to the byte
    28  //   buffer such that a byte buffer offset corresponds to the Pos value for
    29  //   that file location
    30  // - create a suffix array from the concatenated sources
    31  //
    32  // String lookup in full text index:
    33  // - use the suffix array to lookup a string's offsets - the offsets
    34  //   correspond to the Pos values relative to the file set
    35  // - translate the Pos values back into file and line information and
    36  //   sort the result
    37  
    38  package godoc
    39  
    40  import (
    41  	"bufio"
    42  	"bytes"
    43  	"encoding/gob"
    44  	"errors"
    45  	"fmt"
    46  	"go/ast"
    47  	"go/doc"
    48  	"go/parser"
    49  	"go/token"
    50  	"index/suffixarray"
    51  	"io"
    52  	"log"
    53  	"os"
    54  	pathpkg "path"
    55  	"path/filepath"
    56  	"regexp"
    57  	"runtime"
    58  	"sort"
    59  	"strconv"
    60  	"strings"
    61  	"sync"
    62  	"time"
    63  	"unicode"
    64  
    65  	"github.com/powerman/golang-tools/godoc/util"
    66  	"github.com/powerman/golang-tools/godoc/vfs"
    67  )
    68  
    69  // ----------------------------------------------------------------------------
    70  // InterfaceSlice is a helper type for sorting interface
    71  // slices according to some slice-specific sort criteria.
    72  
    73  type comparer func(x, y interface{}) bool
    74  
    75  type interfaceSlice struct {
    76  	slice []interface{}
    77  	less  comparer
    78  }
    79  
    80  // ----------------------------------------------------------------------------
    81  // RunList
    82  
    83  // A RunList is a list of entries that can be sorted according to some
    84  // criteria. A RunList may be compressed by grouping "runs" of entries
    85  // which are equal (according to the sort criteria) into a new RunList of
    86  // runs. For instance, a RunList containing pairs (x, y) may be compressed
    87  // into a RunList containing pair runs (x, {y}) where each run consists of
    88  // a list of y's with the same x.
    89  type RunList []interface{}
    90  
    91  func (h RunList) sort(less comparer) {
    92  	sort.Sort(&interfaceSlice{h, less})
    93  }
    94  
    95  func (p *interfaceSlice) Len() int           { return len(p.slice) }
    96  func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
    97  func (p *interfaceSlice) Swap(i, j int)      { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }
    98  
    99  // Compress entries which are the same according to a sort criteria
   100  // (specified by less) into "runs".
   101  func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList {
   102  	if len(h) == 0 {
   103  		return nil
   104  	}
   105  	// len(h) > 0
   106  
   107  	// create runs of entries with equal values
   108  	h.sort(less)
   109  
   110  	// for each run, make a new run object and collect them in a new RunList
   111  	var hh RunList
   112  	i, x := 0, h[0]
   113  	for j, y := range h {
   114  		if less(x, y) {
   115  			hh = append(hh, newRun(h[i:j]))
   116  			i, x = j, h[j] // start a new run
   117  		}
   118  	}
   119  	// add final run, if any
   120  	if i < len(h) {
   121  		hh = append(hh, newRun(h[i:]))
   122  	}
   123  
   124  	return hh
   125  }
   126  
   127  // ----------------------------------------------------------------------------
   128  // KindRun
   129  
   130  // Debugging support. Disable to see multiple entries per line.
   131  const removeDuplicates = true
   132  
   133  // A KindRun is a run of SpotInfos of the same kind in a given file.
   134  // The kind (3 bits) is stored in each SpotInfo element; to find the
   135  // kind of a KindRun, look at any of its elements.
   136  type KindRun []SpotInfo
   137  
   138  // KindRuns are sorted by line number or index. Since the isIndex bit
   139  // is always the same for all infos in one list we can compare lori's.
   140  func (k KindRun) Len() int           { return len(k) }
   141  func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
   142  func (k KindRun) Swap(i, j int)      { k[i], k[j] = k[j], k[i] }
   143  
   144  // FileRun contents are sorted by Kind for the reduction into KindRuns.
   145  func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }
   146  
   147  // newKindRun allocates a new KindRun from the SpotInfo run h.
   148  func newKindRun(h RunList) interface{} {
   149  	run := make(KindRun, len(h))
   150  	for i, x := range h {
   151  		run[i] = x.(SpotInfo)
   152  	}
   153  
   154  	// Spots were sorted by file and kind to create this run.
   155  	// Within this run, sort them by line number or index.
   156  	sort.Sort(run)
   157  
   158  	if removeDuplicates {
   159  		// Since both the lori and kind field must be
   160  		// same for duplicates, and since the isIndex
   161  		// bit is always the same for all infos in one
   162  		// list we can simply compare the entire info.
   163  		k := 0
   164  		prev := SpotInfo(1<<32 - 1) // an unlikely value
   165  		for _, x := range run {
   166  			if x != prev {
   167  				run[k] = x
   168  				k++
   169  				prev = x
   170  			}
   171  		}
   172  		run = run[0:k]
   173  	}
   174  
   175  	return run
   176  }
   177  
   178  // ----------------------------------------------------------------------------
   179  // FileRun
   180  
   181  // A Pak describes a Go package.
   182  type Pak struct {
   183  	Path string // path of directory containing the package
   184  	Name string // package name as declared by package clause
   185  }
   186  
   187  // Paks are sorted by name (primary key) and by import path (secondary key).
   188  func (p *Pak) less(q *Pak) bool {
   189  	return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path
   190  }
   191  
   192  // A File describes a Go file.
   193  type File struct {
   194  	Name string // directory-local file name
   195  	Pak  *Pak   // the package to which the file belongs
   196  }
   197  
   198  // Path returns the file path of f.
   199  func (f *File) Path() string {
   200  	return pathpkg.Join(f.Pak.Path, f.Name)
   201  }
   202  
   203  // A Spot describes a single occurrence of a word.
   204  type Spot struct {
   205  	File *File
   206  	Info SpotInfo
   207  }
   208  
   209  // A FileRun is a list of KindRuns belonging to the same file.
   210  type FileRun struct {
   211  	File   *File
   212  	Groups []KindRun
   213  }
   214  
   215  // Spots are sorted by file path for the reduction into FileRuns.
   216  func lessSpot(x, y interface{}) bool {
   217  	fx := x.(Spot).File
   218  	fy := y.(Spot).File
   219  	// same as "return fx.Path() < fy.Path()" but w/o computing the file path first
   220  	px := fx.Pak.Path
   221  	py := fy.Pak.Path
   222  	return px < py || px == py && fx.Name < fy.Name
   223  }
   224  
   225  // newFileRun allocates a new FileRun from the Spot run h.
   226  func newFileRun(h RunList) interface{} {
   227  	file := h[0].(Spot).File
   228  
   229  	// reduce the list of Spots into a list of KindRuns
   230  	h1 := make(RunList, len(h))
   231  	for i, x := range h {
   232  		h1[i] = x.(Spot).Info
   233  	}
   234  	h2 := h1.reduce(lessKind, newKindRun)
   235  
   236  	// create the FileRun
   237  	groups := make([]KindRun, len(h2))
   238  	for i, x := range h2 {
   239  		groups[i] = x.(KindRun)
   240  	}
   241  	return &FileRun{file, groups}
   242  }
   243  
   244  // ----------------------------------------------------------------------------
   245  // PakRun
   246  
   247  // A PakRun describes a run of *FileRuns of a package.
   248  type PakRun struct {
   249  	Pak   *Pak
   250  	Files []*FileRun
   251  }
   252  
   253  // Sorting support for files within a PakRun.
   254  func (p *PakRun) Len() int           { return len(p.Files) }
   255  func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
   256  func (p *PakRun) Swap(i, j int)      { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
   257  
   258  // FileRuns are sorted by package for the reduction into PakRuns.
   259  func lessFileRun(x, y interface{}) bool {
   260  	return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
   261  }
   262  
   263  // newPakRun allocates a new PakRun from the *FileRun run h.
   264  func newPakRun(h RunList) interface{} {
   265  	pak := h[0].(*FileRun).File.Pak
   266  	files := make([]*FileRun, len(h))
   267  	for i, x := range h {
   268  		files[i] = x.(*FileRun)
   269  	}
   270  	run := &PakRun{pak, files}
   271  	sort.Sort(run) // files were sorted by package; sort them by file now
   272  	return run
   273  }
   274  
   275  // ----------------------------------------------------------------------------
   276  // HitList
   277  
   278  // A HitList describes a list of PakRuns.
   279  type HitList []*PakRun
   280  
   281  // PakRuns are sorted by package.
   282  func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }
   283  
   284  func reduce(h0 RunList) HitList {
   285  	// reduce a list of Spots into a list of FileRuns
   286  	h1 := h0.reduce(lessSpot, newFileRun)
   287  	// reduce a list of FileRuns into a list of PakRuns
   288  	h2 := h1.reduce(lessFileRun, newPakRun)
   289  	// sort the list of PakRuns by package
   290  	h2.sort(lessPakRun)
   291  	// create a HitList
   292  	h := make(HitList, len(h2))
   293  	for i, p := range h2 {
   294  		h[i] = p.(*PakRun)
   295  	}
   296  	return h
   297  }
   298  
   299  // filter returns a new HitList created by filtering
   300  // all PakRuns from h that have a matching pakname.
   301  func (h HitList) filter(pakname string) HitList {
   302  	var hh HitList
   303  	for _, p := range h {
   304  		if p.Pak.Name == pakname {
   305  			hh = append(hh, p)
   306  		}
   307  	}
   308  	return hh
   309  }
   310  
   311  // ----------------------------------------------------------------------------
   312  // AltWords
   313  
   314  type wordPair struct {
   315  	canon string // canonical word spelling (all lowercase)
   316  	alt   string // alternative spelling
   317  }
   318  
   319  // An AltWords describes a list of alternative spellings for a
   320  // canonical (all lowercase) spelling of a word.
   321  type AltWords struct {
   322  	Canon string   // canonical word spelling (all lowercase)
   323  	Alts  []string // alternative spelling for the same word
   324  }
   325  
   326  // wordPairs are sorted by their canonical spelling.
   327  func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon }
   328  
   329  // newAltWords allocates a new AltWords from the *wordPair run h.
   330  func newAltWords(h RunList) interface{} {
   331  	canon := h[0].(*wordPair).canon
   332  	alts := make([]string, len(h))
   333  	for i, x := range h {
   334  		alts[i] = x.(*wordPair).alt
   335  	}
   336  	return &AltWords{canon, alts}
   337  }
   338  
   339  func (a *AltWords) filter(s string) *AltWords {
   340  	var alts []string
   341  	for _, w := range a.Alts {
   342  		if w != s {
   343  			alts = append(alts, w)
   344  		}
   345  	}
   346  	if len(alts) > 0 {
   347  		return &AltWords{a.Canon, alts}
   348  	}
   349  	return nil
   350  }
   351  
   352  // Ident stores information about external identifiers in order to create
   353  // links to package documentation.
   354  type Ident struct {
   355  	Path    string // e.g. "net/http"
   356  	Package string // e.g. "http"
   357  	Name    string // e.g. "NewRequest"
   358  	Doc     string // e.g. "NewRequest returns a new Request..."
   359  }
   360  
   361  // byImportCount sorts the given slice of Idents by the import
   362  // counts of the packages to which they belong.
   363  type byImportCount struct {
   364  	Idents      []Ident
   365  	ImportCount map[string]int
   366  }
   367  
   368  func (ic byImportCount) Len() int {
   369  	return len(ic.Idents)
   370  }
   371  
   372  func (ic byImportCount) Less(i, j int) bool {
   373  	ri := ic.ImportCount[ic.Idents[i].Path]
   374  	rj := ic.ImportCount[ic.Idents[j].Path]
   375  	if ri == rj {
   376  		return ic.Idents[i].Path < ic.Idents[j].Path
   377  	}
   378  	return ri > rj
   379  }
   380  
   381  func (ic byImportCount) Swap(i, j int) {
   382  	ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i]
   383  }
   384  
   385  func (ic byImportCount) String() string {
   386  	buf := bytes.NewBuffer([]byte("["))
   387  	for _, v := range ic.Idents {
   388  		buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path]))
   389  	}
   390  	buf.WriteString("\n]")
   391  	return buf.String()
   392  }
   393  
   394  // filter creates a new Ident list where the results match the given
   395  // package name.
   396  func (ic byImportCount) filter(pakname string) []Ident {
   397  	if ic.Idents == nil {
   398  		return nil
   399  	}
   400  	var res []Ident
   401  	for _, i := range ic.Idents {
   402  		if i.Package == pakname {
   403  			res = append(res, i)
   404  		}
   405  	}
   406  	return res
   407  }
   408  
   409  // top returns the top n identifiers.
   410  func (ic byImportCount) top(n int) []Ident {
   411  	if len(ic.Idents) > n {
   412  		return ic.Idents[:n]
   413  	}
   414  	return ic.Idents
   415  }
   416  
   417  // ----------------------------------------------------------------------------
   418  // Indexer
   419  
   420  type IndexResult struct {
   421  	Decls  RunList // package-level declarations (with snippets)
   422  	Others RunList // all other occurrences
   423  }
   424  
   425  // Statistics provides statistics information for an index.
   426  type Statistics struct {
   427  	Bytes int // total size of indexed source files
   428  	Files int // number of indexed source files
   429  	Lines int // number of lines (all files)
   430  	Words int // number of different identifiers
   431  	Spots int // number of identifier occurrences
   432  }
   433  
   434  // An Indexer maintains the data structures and provides the machinery
   435  // for indexing .go files under a file tree. It implements the path.Visitor
   436  // interface for walking file trees, and the ast.Visitor interface for
   437  // walking Go ASTs.
   438  type Indexer struct {
   439  	c          *Corpus
   440  	fset       *token.FileSet // file set for all indexed files
   441  	fsOpenGate chan bool      // send pre fs.Open; receive on close
   442  
   443  	mu            sync.Mutex              // guards all the following
   444  	sources       bytes.Buffer            // concatenated sources
   445  	strings       map[string]string       // interned string
   446  	packages      map[Pak]*Pak            // interned *Paks
   447  	words         map[string]*IndexResult // RunLists of Spots
   448  	snippets      []*Snippet              // indices are stored in SpotInfos
   449  	current       *token.File             // last file added to file set
   450  	file          *File                   // AST for current file
   451  	decl          ast.Decl                // AST for current decl
   452  	stats         Statistics
   453  	throttle      *util.Throttle
   454  	importCount   map[string]int                 // package path ("net/http") => count
   455  	packagePath   map[string]map[string]bool     // "template" => "text/template" => true
   456  	exports       map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
   457  	curPkgExports map[string]SpotKind
   458  	idents        map[SpotKind]map[string][]Ident // kind => name => list of Idents
   459  }
   460  
   461  func (x *Indexer) intern(s string) string {
   462  	if s, ok := x.strings[s]; ok {
   463  		return s
   464  	}
   465  	x.strings[s] = s
   466  	return s
   467  }
   468  
   469  func (x *Indexer) lookupPackage(path, name string) *Pak {
   470  	// In the source directory tree, more than one package may
   471  	// live in the same directory. For the packages map, construct
   472  	// a key that includes both the directory path and the package
   473  	// name.
   474  	key := Pak{Path: x.intern(path), Name: x.intern(name)}
   475  	pak := x.packages[key]
   476  	if pak == nil {
   477  		pak = &key
   478  		x.packages[key] = pak
   479  	}
   480  	return pak
   481  }
   482  
   483  func (x *Indexer) addSnippet(s *Snippet) int {
   484  	index := len(x.snippets)
   485  	x.snippets = append(x.snippets, s)
   486  	return index
   487  }
   488  
   489  func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
   490  	if id == nil {
   491  		return
   492  	}
   493  	name := x.intern(id.Name)
   494  
   495  	switch kind {
   496  	case TypeDecl, FuncDecl, ConstDecl, VarDecl:
   497  		x.curPkgExports[name] = kind
   498  	}
   499  
   500  	lists, found := x.words[name]
   501  	if !found {
   502  		lists = new(IndexResult)
   503  		x.words[name] = lists
   504  	}
   505  
   506  	if kind == Use || x.decl == nil {
   507  		if x.c.IndexGoCode {
   508  			// not a declaration or no snippet required
   509  			info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
   510  			lists.Others = append(lists.Others, Spot{x.file, info})
   511  		}
   512  	} else {
   513  		// a declaration with snippet
   514  		index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
   515  		info := makeSpotInfo(kind, index, true)
   516  		lists.Decls = append(lists.Decls, Spot{x.file, info})
   517  	}
   518  
   519  	x.stats.Spots++
   520  }
   521  
   522  func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) {
   523  	for _, f := range flist.List {
   524  		x.decl = nil // no snippets for fields
   525  		for _, name := range f.Names {
   526  			x.visitIdent(kind, name)
   527  		}
   528  		ast.Walk(x, f.Type)
   529  		// ignore tag - not indexed at the moment
   530  	}
   531  }
   532  
   533  func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) {
   534  	switch n := spec.(type) {
   535  	case *ast.ImportSpec:
   536  		x.visitIdent(ImportDecl, n.Name)
   537  		if n.Path != nil {
   538  			if imp, err := strconv.Unquote(n.Path.Value); err == nil {
   539  				x.importCount[x.intern(imp)]++
   540  			}
   541  		}
   542  
   543  	case *ast.ValueSpec:
   544  		for _, n := range n.Names {
   545  			x.visitIdent(kind, n)
   546  		}
   547  		ast.Walk(x, n.Type)
   548  		for _, v := range n.Values {
   549  			ast.Walk(x, v)
   550  		}
   551  
   552  	case *ast.TypeSpec:
   553  		x.visitIdent(TypeDecl, n.Name)
   554  		ast.Walk(x, n.Type)
   555  	}
   556  }
   557  
   558  func (x *Indexer) visitGenDecl(decl *ast.GenDecl) {
   559  	kind := VarDecl
   560  	if decl.Tok == token.CONST {
   561  		kind = ConstDecl
   562  	}
   563  	x.decl = decl
   564  	for _, s := range decl.Specs {
   565  		x.visitSpec(kind, s)
   566  	}
   567  }
   568  
   569  func (x *Indexer) Visit(node ast.Node) ast.Visitor {
   570  	switch n := node.(type) {
   571  	case nil:
   572  		// nothing to do
   573  
   574  	case *ast.Ident:
   575  		x.visitIdent(Use, n)
   576  
   577  	case *ast.FieldList:
   578  		x.visitFieldList(VarDecl, n)
   579  
   580  	case *ast.InterfaceType:
   581  		x.visitFieldList(MethodDecl, n.Methods)
   582  
   583  	case *ast.DeclStmt:
   584  		// local declarations should only be *ast.GenDecls;
   585  		// ignore incorrect ASTs
   586  		if decl, ok := n.Decl.(*ast.GenDecl); ok {
   587  			x.decl = nil // no snippets for local declarations
   588  			x.visitGenDecl(decl)
   589  		}
   590  
   591  	case *ast.GenDecl:
   592  		x.decl = n
   593  		x.visitGenDecl(n)
   594  
   595  	case *ast.FuncDecl:
   596  		kind := FuncDecl
   597  		if n.Recv != nil {
   598  			kind = MethodDecl
   599  			ast.Walk(x, n.Recv)
   600  		}
   601  		x.decl = n
   602  		x.visitIdent(kind, n.Name)
   603  		ast.Walk(x, n.Type)
   604  		if n.Body != nil {
   605  			ast.Walk(x, n.Body)
   606  		}
   607  
   608  	case *ast.File:
   609  		x.decl = nil
   610  		x.visitIdent(PackageClause, n.Name)
   611  		for _, d := range n.Decls {
   612  			ast.Walk(x, d)
   613  		}
   614  
   615  	default:
   616  		return x
   617  	}
   618  
   619  	return nil
   620  }
   621  
   622  // addFile adds a file to the index if possible and returns the file set file
   623  // and the file's AST if it was successfully parsed as a Go file. If addFile
   624  // failed (that is, if the file was not added), it returns file == nil.
   625  func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) {
   626  	defer f.Close()
   627  
   628  	// The file set's base offset and x.sources size must be in lock-step;
   629  	// this permits the direct mapping of suffix array lookup results to
   630  	// to corresponding Pos values.
   631  	//
   632  	// When a file is added to the file set, its offset base increases by
   633  	// the size of the file + 1; and the initial base offset is 1. Add an
   634  	// extra byte to the sources here.
   635  	x.sources.WriteByte(0)
   636  
   637  	// If the sources length doesn't match the file set base at this point
   638  	// the file set implementation changed or we have another error.
   639  	base := x.fset.Base()
   640  	if x.sources.Len() != base {
   641  		panic("internal error: file base incorrect")
   642  	}
   643  
   644  	// append file contents (src) to x.sources
   645  	if _, err := x.sources.ReadFrom(f); err == nil {
   646  		src := x.sources.Bytes()[base:]
   647  
   648  		if goFile {
   649  			// parse the file and in the process add it to the file set
   650  			if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
   651  				file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file
   652  				return
   653  			}
   654  			// file has parse errors, and the AST may be incorrect -
   655  			// set lines information explicitly and index as ordinary
   656  			// text file (cannot fall through to the text case below
   657  			// because the file has already been added to the file set
   658  			// by the parser)
   659  			file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file
   660  			file.SetLinesForContent(src)
   661  			ast = nil
   662  			return
   663  		}
   664  
   665  		if util.IsText(src) {
   666  			// only add the file to the file set (for the full text index)
   667  			file = x.fset.AddFile(filename, x.fset.Base(), len(src))
   668  			file.SetLinesForContent(src)
   669  			return
   670  		}
   671  	}
   672  
   673  	// discard possibly added data
   674  	x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added
   675  	return
   676  }
   677  
   678  // Design note: Using an explicit white list of permitted files for indexing
   679  // makes sure that the important files are included and massively reduces the
   680  // number of files to index. The advantage over a blacklist is that unexpected
   681  // (non-blacklisted) files won't suddenly explode the index.
   682  
   683  // Files are whitelisted if they have a file name or extension
   684  // present as key in whitelisted.
   685  var whitelisted = map[string]bool{
   686  	".bash":        true,
   687  	".c":           true,
   688  	".cc":          true,
   689  	".cpp":         true,
   690  	".cxx":         true,
   691  	".css":         true,
   692  	".go":          true,
   693  	".goc":         true,
   694  	".h":           true,
   695  	".hh":          true,
   696  	".hpp":         true,
   697  	".hxx":         true,
   698  	".html":        true,
   699  	".js":          true,
   700  	".out":         true,
   701  	".py":          true,
   702  	".s":           true,
   703  	".sh":          true,
   704  	".txt":         true,
   705  	".xml":         true,
   706  	"AUTHORS":      true,
   707  	"CONTRIBUTORS": true,
   708  	"LICENSE":      true,
   709  	"Makefile":     true,
   710  	"PATENTS":      true,
   711  	"README":       true,
   712  }
   713  
   714  // isWhitelisted returns true if a file is on the list
   715  // of "permitted" files for indexing. The filename must
   716  // be the directory-local name of the file.
   717  func isWhitelisted(filename string) bool {
   718  	key := pathpkg.Ext(filename)
   719  	if key == "" {
   720  		// file has no extension - use entire filename
   721  		key = filename
   722  	}
   723  	return whitelisted[key]
   724  }
   725  
   726  func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) {
   727  	pkgName := x.intern(astFile.Name.Name)
   728  	if pkgName == "main" {
   729  		return
   730  	}
   731  	pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
   732  	astPkg := ast.Package{
   733  		Name: pkgName,
   734  		Files: map[string]*ast.File{
   735  			filename: astFile,
   736  		},
   737  	}
   738  	var m doc.Mode
   739  	docPkg := doc.New(&astPkg, dirname, m)
   740  	addIdent := func(sk SpotKind, name string, docstr string) {
   741  		if x.idents[sk] == nil {
   742  			x.idents[sk] = make(map[string][]Ident)
   743  		}
   744  		name = x.intern(name)
   745  		x.idents[sk][name] = append(x.idents[sk][name], Ident{
   746  			Path:    pkgPath,
   747  			Package: pkgName,
   748  			Name:    name,
   749  			Doc:     doc.Synopsis(docstr),
   750  		})
   751  	}
   752  
   753  	if x.idents[PackageClause] == nil {
   754  		x.idents[PackageClause] = make(map[string][]Ident)
   755  	}
   756  	// List of words under which the package identifier will be stored.
   757  	// This includes the package name and the components of the directory
   758  	// in which it resides.
   759  	words := strings.Split(pathpkg.Dir(pkgPath), "/")
   760  	if words[0] == "." {
   761  		words = []string{}
   762  	}
   763  	name := x.intern(docPkg.Name)
   764  	synopsis := doc.Synopsis(docPkg.Doc)
   765  	words = append(words, name)
   766  	pkgIdent := Ident{
   767  		Path:    pkgPath,
   768  		Package: pkgName,
   769  		Name:    name,
   770  		Doc:     synopsis,
   771  	}
   772  	for _, word := range words {
   773  		word = x.intern(word)
   774  		found := false
   775  		pkgs := x.idents[PackageClause][word]
   776  		for i, p := range pkgs {
   777  			if p.Path == pkgPath {
   778  				if docPkg.Doc != "" {
   779  					p.Doc = synopsis
   780  					pkgs[i] = p
   781  				}
   782  				found = true
   783  				break
   784  			}
   785  		}
   786  		if !found {
   787  			x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent)
   788  		}
   789  	}
   790  
   791  	for _, c := range docPkg.Consts {
   792  		for _, name := range c.Names {
   793  			addIdent(ConstDecl, name, c.Doc)
   794  		}
   795  	}
   796  	for _, t := range docPkg.Types {
   797  		addIdent(TypeDecl, t.Name, t.Doc)
   798  		for _, c := range t.Consts {
   799  			for _, name := range c.Names {
   800  				addIdent(ConstDecl, name, c.Doc)
   801  			}
   802  		}
   803  		for _, v := range t.Vars {
   804  			for _, name := range v.Names {
   805  				addIdent(VarDecl, name, v.Doc)
   806  			}
   807  		}
   808  		for _, f := range t.Funcs {
   809  			addIdent(FuncDecl, f.Name, f.Doc)
   810  		}
   811  		for _, f := range t.Methods {
   812  			addIdent(MethodDecl, f.Name, f.Doc)
   813  			// Change the name of methods to be "<typename>.<methodname>".
   814  			// They will still be indexed as <methodname>.
   815  			idents := x.idents[MethodDecl][f.Name]
   816  			idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name)
   817  		}
   818  	}
   819  	for _, v := range docPkg.Vars {
   820  		for _, name := range v.Names {
   821  			addIdent(VarDecl, name, v.Doc)
   822  		}
   823  	}
   824  	for _, f := range docPkg.Funcs {
   825  		addIdent(FuncDecl, f.Name, f.Doc)
   826  	}
   827  }
   828  
   829  func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) {
   830  	pkgName := astFile.Name.Name
   831  
   832  	if x.c.IndexGoCode {
   833  		x.current = file
   834  		pak := x.lookupPackage(dirname, pkgName)
   835  		x.file = &File{filename, pak}
   836  		ast.Walk(x, astFile)
   837  	}
   838  
   839  	if x.c.IndexDocs {
   840  		// Test files are already filtered out in visitFile if IndexGoCode and
   841  		// IndexFullText are false.  Otherwise, check here.
   842  		isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) &&
   843  			(strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/"))
   844  		if !isTestFile {
   845  			x.indexDocs(dirname, filename, astFile)
   846  		}
   847  	}
   848  
   849  	ppKey := x.intern(pkgName)
   850  	if _, ok := x.packagePath[ppKey]; !ok {
   851  		x.packagePath[ppKey] = make(map[string]bool)
   852  	}
   853  	pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
   854  	x.packagePath[ppKey][pkgPath] = true
   855  
   856  	// Merge in exported symbols found walking this file into
   857  	// the map for that package.
   858  	if len(x.curPkgExports) > 0 {
   859  		dest, ok := x.exports[pkgPath]
   860  		if !ok {
   861  			dest = make(map[string]SpotKind)
   862  			x.exports[pkgPath] = dest
   863  		}
   864  		for k, v := range x.curPkgExports {
   865  			dest[k] = v
   866  		}
   867  	}
   868  }
   869  
   870  func (x *Indexer) visitFile(dirname string, fi os.FileInfo) {
   871  	if fi.IsDir() || !x.c.IndexEnabled {
   872  		return
   873  	}
   874  
   875  	filename := pathpkg.Join(dirname, fi.Name())
   876  	goFile := isGoFile(fi)
   877  
   878  	switch {
   879  	case x.c.IndexFullText:
   880  		if !isWhitelisted(fi.Name()) {
   881  			return
   882  		}
   883  	case x.c.IndexGoCode:
   884  		if !goFile {
   885  			return
   886  		}
   887  	case x.c.IndexDocs:
   888  		if !goFile ||
   889  			strings.HasSuffix(fi.Name(), "_test.go") ||
   890  			strings.HasPrefix(dirname, "/test/") {
   891  			return
   892  		}
   893  	default:
   894  		// No indexing turned on.
   895  		return
   896  	}
   897  
   898  	x.fsOpenGate <- true
   899  	defer func() { <-x.fsOpenGate }()
   900  
   901  	// open file
   902  	f, err := x.c.fs.Open(filename)
   903  	if err != nil {
   904  		return
   905  	}
   906  
   907  	x.mu.Lock()
   908  	defer x.mu.Unlock()
   909  
   910  	x.throttle.Throttle()
   911  
   912  	x.curPkgExports = make(map[string]SpotKind)
   913  	file, fast := x.addFile(f, filename, goFile)
   914  	if file == nil {
   915  		return // addFile failed
   916  	}
   917  
   918  	if fast != nil {
   919  		x.indexGoFile(dirname, fi.Name(), file, fast)
   920  	}
   921  
   922  	// update statistics
   923  	x.stats.Bytes += file.Size()
   924  	x.stats.Files++
   925  	x.stats.Lines += file.LineCount()
   926  }
   927  
   928  // indexOptions contains information that affects the contents of an index.
   929  type indexOptions struct {
   930  	// Docs provides documentation search results.
   931  	// It is only consulted if IndexEnabled is true.
   932  	// The default values is true.
   933  	Docs bool
   934  
   935  	// GoCode provides Go source code search results.
   936  	// It is only consulted if IndexEnabled is true.
   937  	// The default values is true.
   938  	GoCode bool
   939  
   940  	// FullText provides search results from all files.
   941  	// It is only consulted if IndexEnabled is true.
   942  	// The default values is true.
   943  	FullText bool
   944  
   945  	// MaxResults optionally specifies the maximum results for indexing.
   946  	// The default is 1000.
   947  	MaxResults int
   948  }
   949  
   950  // ----------------------------------------------------------------------------
   951  // Index
   952  
   953  type LookupResult struct {
   954  	Decls  HitList // package-level declarations (with snippets)
   955  	Others HitList // all other occurrences
   956  }
   957  
   958  type Index struct {
   959  	fset        *token.FileSet           // file set used during indexing; nil if no textindex
   960  	suffixes    *suffixarray.Index       // suffixes for concatenated sources; nil if no textindex
   961  	words       map[string]*LookupResult // maps words to hit lists
   962  	alts        map[string]*AltWords     // maps canonical(words) to lists of alternative spellings
   963  	snippets    []*Snippet               // all snippets, indexed by snippet index
   964  	stats       Statistics
   965  	importCount map[string]int                 // package path ("net/http") => count
   966  	packagePath map[string]map[string]bool     // "template" => "text/template" => true
   967  	exports     map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
   968  	idents      map[SpotKind]map[string][]Ident
   969  	opts        indexOptions
   970  }
   971  
   972  func canonical(w string) string { return strings.ToLower(w) }
   973  
   974  // Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems
   975  // consuming file descriptors, where some systems have low 256 or 512 limits.
   976  // Go should have a built-in way to cap fd usage under the ulimit.
   977  const (
   978  	maxOpenFiles = 200
   979  	maxOpenDirs  = 50
   980  )
   981  
   982  func (c *Corpus) throttle() float64 {
   983  	if c.IndexThrottle <= 0 {
   984  		return 0.9
   985  	}
   986  	if c.IndexThrottle > 1.0 {
   987  		return 1.0
   988  	}
   989  	return c.IndexThrottle
   990  }
   991  
   992  // NewIndex creates a new index for the .go files provided by the corpus.
   993  func (c *Corpus) NewIndex() *Index {
   994  	// initialize Indexer
   995  	// (use some reasonably sized maps to start)
   996  	x := &Indexer{
   997  		c:           c,
   998  		fset:        token.NewFileSet(),
   999  		fsOpenGate:  make(chan bool, maxOpenFiles),
  1000  		strings:     make(map[string]string),
  1001  		packages:    make(map[Pak]*Pak, 256),
  1002  		words:       make(map[string]*IndexResult, 8192),
  1003  		throttle:    util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time
  1004  		importCount: make(map[string]int),
  1005  		packagePath: make(map[string]map[string]bool),
  1006  		exports:     make(map[string]map[string]SpotKind),
  1007  		idents:      make(map[SpotKind]map[string][]Ident, 4),
  1008  	}
  1009  
  1010  	// index all files in the directories given by dirnames
  1011  	var wg sync.WaitGroup // outstanding ReadDir + visitFile
  1012  	dirGate := make(chan bool, maxOpenDirs)
  1013  	for dirname := range c.fsDirnames() {
  1014  		if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
  1015  			continue
  1016  		}
  1017  		dirGate <- true
  1018  		wg.Add(1)
  1019  		go func(dirname string) {
  1020  			defer func() { <-dirGate }()
  1021  			defer wg.Done()
  1022  
  1023  			list, err := c.fs.ReadDir(dirname)
  1024  			if err != nil {
  1025  				log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
  1026  				return // ignore this directory
  1027  			}
  1028  			for _, fi := range list {
  1029  				wg.Add(1)
  1030  				go func(fi os.FileInfo) {
  1031  					defer wg.Done()
  1032  					x.visitFile(dirname, fi)
  1033  				}(fi)
  1034  			}
  1035  		}(dirname)
  1036  	}
  1037  	wg.Wait()
  1038  
  1039  	if !c.IndexFullText {
  1040  		// the file set, the current file, and the sources are
  1041  		// not needed after indexing if no text index is built -
  1042  		// help GC and clear them
  1043  		x.fset = nil
  1044  		x.sources.Reset()
  1045  		x.current = nil // contains reference to fset!
  1046  	}
  1047  
  1048  	// for each word, reduce the RunLists into a LookupResult;
  1049  	// also collect the word with its canonical spelling in a
  1050  	// word list for later computation of alternative spellings
  1051  	words := make(map[string]*LookupResult)
  1052  	var wlist RunList
  1053  	for w, h := range x.words {
  1054  		decls := reduce(h.Decls)
  1055  		others := reduce(h.Others)
  1056  		words[w] = &LookupResult{
  1057  			Decls:  decls,
  1058  			Others: others,
  1059  		}
  1060  		wlist = append(wlist, &wordPair{canonical(w), w})
  1061  		x.throttle.Throttle()
  1062  	}
  1063  	x.stats.Words = len(words)
  1064  
  1065  	// reduce the word list {canonical(w), w} into
  1066  	// a list of AltWords runs {canonical(w), {w}}
  1067  	alist := wlist.reduce(lessWordPair, newAltWords)
  1068  
  1069  	// convert alist into a map of alternative spellings
  1070  	alts := make(map[string]*AltWords)
  1071  	for i := 0; i < len(alist); i++ {
  1072  		a := alist[i].(*AltWords)
  1073  		alts[a.Canon] = a
  1074  	}
  1075  
  1076  	// create text index
  1077  	var suffixes *suffixarray.Index
  1078  	if c.IndexFullText {
  1079  		suffixes = suffixarray.New(x.sources.Bytes())
  1080  	}
  1081  
  1082  	// sort idents by the number of imports of their respective packages
  1083  	for _, idMap := range x.idents {
  1084  		for _, ir := range idMap {
  1085  			sort.Sort(byImportCount{ir, x.importCount})
  1086  		}
  1087  	}
  1088  
  1089  	return &Index{
  1090  		fset:        x.fset,
  1091  		suffixes:    suffixes,
  1092  		words:       words,
  1093  		alts:        alts,
  1094  		snippets:    x.snippets,
  1095  		stats:       x.stats,
  1096  		importCount: x.importCount,
  1097  		packagePath: x.packagePath,
  1098  		exports:     x.exports,
  1099  		idents:      x.idents,
  1100  		opts: indexOptions{
  1101  			Docs:       x.c.IndexDocs,
  1102  			GoCode:     x.c.IndexGoCode,
  1103  			FullText:   x.c.IndexFullText,
  1104  			MaxResults: x.c.MaxResults,
  1105  		},
  1106  	}
  1107  }
  1108  
  1109  var ErrFileIndexVersion = errors.New("file index version out of date")
  1110  
  1111  const fileIndexVersion = 3
  1112  
  1113  // fileIndex is the subset of Index that's gob-encoded for use by
  1114  // Index.Write and Index.Read.
  1115  type fileIndex struct {
  1116  	Version     int
  1117  	Words       map[string]*LookupResult
  1118  	Alts        map[string]*AltWords
  1119  	Snippets    []*Snippet
  1120  	Fulltext    bool
  1121  	Stats       Statistics
  1122  	ImportCount map[string]int
  1123  	PackagePath map[string]map[string]bool
  1124  	Exports     map[string]map[string]SpotKind
  1125  	Idents      map[SpotKind]map[string][]Ident
  1126  	Opts        indexOptions
  1127  }
  1128  
  1129  func (x *fileIndex) Write(w io.Writer) error {
  1130  	return gob.NewEncoder(w).Encode(x)
  1131  }
  1132  
  1133  func (x *fileIndex) Read(r io.Reader) error {
  1134  	return gob.NewDecoder(r).Decode(x)
  1135  }
  1136  
  1137  // WriteTo writes the index x to w.
  1138  func (x *Index) WriteTo(w io.Writer) (n int64, err error) {
  1139  	w = countingWriter{&n, w}
  1140  	fulltext := false
  1141  	if x.suffixes != nil {
  1142  		fulltext = true
  1143  	}
  1144  	fx := fileIndex{
  1145  		Version:     fileIndexVersion,
  1146  		Words:       x.words,
  1147  		Alts:        x.alts,
  1148  		Snippets:    x.snippets,
  1149  		Fulltext:    fulltext,
  1150  		Stats:       x.stats,
  1151  		ImportCount: x.importCount,
  1152  		PackagePath: x.packagePath,
  1153  		Exports:     x.exports,
  1154  		Idents:      x.idents,
  1155  		Opts:        x.opts,
  1156  	}
  1157  	if err := fx.Write(w); err != nil {
  1158  		return 0, err
  1159  	}
  1160  	if fulltext {
  1161  		encode := func(x interface{}) error {
  1162  			return gob.NewEncoder(w).Encode(x)
  1163  		}
  1164  		if err := x.fset.Write(encode); err != nil {
  1165  			return 0, err
  1166  		}
  1167  		if err := x.suffixes.Write(w); err != nil {
  1168  			return 0, err
  1169  		}
  1170  	}
  1171  	return n, nil
  1172  }
  1173  
  1174  // ReadFrom reads the index from r into x; x must not be nil.
  1175  // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader.
  1176  // If the index is from an old version, the error is ErrFileIndexVersion.
  1177  func (x *Index) ReadFrom(r io.Reader) (n int64, err error) {
  1178  	// We use the ability to read bytes as a plausible surrogate for buffering.
  1179  	if _, ok := r.(io.ByteReader); !ok {
  1180  		r = bufio.NewReader(r)
  1181  	}
  1182  	r = countingReader{&n, r.(byteReader)}
  1183  	var fx fileIndex
  1184  	if err := fx.Read(r); err != nil {
  1185  		return n, err
  1186  	}
  1187  	if fx.Version != fileIndexVersion {
  1188  		return 0, ErrFileIndexVersion
  1189  	}
  1190  	x.words = fx.Words
  1191  	x.alts = fx.Alts
  1192  	x.snippets = fx.Snippets
  1193  	x.stats = fx.Stats
  1194  	x.importCount = fx.ImportCount
  1195  	x.packagePath = fx.PackagePath
  1196  	x.exports = fx.Exports
  1197  	x.idents = fx.Idents
  1198  	x.opts = fx.Opts
  1199  	if fx.Fulltext {
  1200  		x.fset = token.NewFileSet()
  1201  		decode := func(x interface{}) error {
  1202  			return gob.NewDecoder(r).Decode(x)
  1203  		}
  1204  		if err := x.fset.Read(decode); err != nil {
  1205  			return n, err
  1206  		}
  1207  		x.suffixes = new(suffixarray.Index)
  1208  		if err := x.suffixes.Read(r); err != nil {
  1209  			return n, err
  1210  		}
  1211  	}
  1212  	return n, nil
  1213  }
  1214  
  1215  // Stats returns index statistics.
  1216  func (x *Index) Stats() Statistics {
  1217  	return x.stats
  1218  }
  1219  
  1220  // ImportCount returns a map from import paths to how many times they were seen.
  1221  func (x *Index) ImportCount() map[string]int {
  1222  	return x.importCount
  1223  }
  1224  
  1225  // PackagePath returns a map from short package name to a set
  1226  // of full package path names that use that short package name.
  1227  func (x *Index) PackagePath() map[string]map[string]bool {
  1228  	return x.packagePath
  1229  }
  1230  
  1231  // Exports returns a map from full package path to exported
  1232  // symbol name to its type.
  1233  func (x *Index) Exports() map[string]map[string]SpotKind {
  1234  	return x.exports
  1235  }
  1236  
  1237  // Idents returns a map from identifier type to exported
  1238  // symbol name to the list of identifiers matching that name.
  1239  func (x *Index) Idents() map[SpotKind]map[string][]Ident {
  1240  	return x.idents
  1241  }
  1242  
  1243  func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) {
  1244  	match = x.words[w]
  1245  	alt = x.alts[canonical(w)]
  1246  	// remove current spelling from alternatives
  1247  	// (if there is no match, the alternatives do
  1248  	// not contain the current spelling)
  1249  	if match != nil && alt != nil {
  1250  		alt = alt.filter(w)
  1251  	}
  1252  	return
  1253  }
  1254  
  1255  // isIdentifier reports whether s is a Go identifier.
  1256  func isIdentifier(s string) bool {
  1257  	for i, ch := range s {
  1258  		if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) {
  1259  			continue
  1260  		}
  1261  		return false
  1262  	}
  1263  	return len(s) > 0
  1264  }
  1265  
  1266  // For a given query, which is either a single identifier or a qualified
  1267  // identifier, Lookup returns a SearchResult containing packages, a LookupResult, a
  1268  // list of alternative spellings, and identifiers, if any. Any and all results
  1269  // may be nil.  If the query syntax is wrong, an error is reported.
  1270  func (x *Index) Lookup(query string) (*SearchResult, error) {
  1271  	ss := strings.Split(query, ".")
  1272  
  1273  	// check query syntax
  1274  	for _, s := range ss {
  1275  		if !isIdentifier(s) {
  1276  			return nil, errors.New("all query parts must be identifiers")
  1277  		}
  1278  	}
  1279  	rslt := &SearchResult{
  1280  		Query:  query,
  1281  		Idents: make(map[SpotKind][]Ident, 5),
  1282  	}
  1283  	// handle simple and qualified identifiers
  1284  	switch len(ss) {
  1285  	case 1:
  1286  		ident := ss[0]
  1287  		rslt.Hit, rslt.Alt = x.lookupWord(ident)
  1288  		if rslt.Hit != nil {
  1289  			// found a match - filter packages with same name
  1290  			// for the list of packages called ident, if any
  1291  			rslt.Pak = rslt.Hit.Others.filter(ident)
  1292  		}
  1293  		for k, v := range x.idents {
  1294  			const rsltLimit = 50
  1295  			ids := byImportCount{v[ident], x.importCount}
  1296  			rslt.Idents[k] = ids.top(rsltLimit)
  1297  		}
  1298  
  1299  	case 2:
  1300  		pakname, ident := ss[0], ss[1]
  1301  		rslt.Hit, rslt.Alt = x.lookupWord(ident)
  1302  		if rslt.Hit != nil {
  1303  			// found a match - filter by package name
  1304  			// (no paks - package names are not qualified)
  1305  			decls := rslt.Hit.Decls.filter(pakname)
  1306  			others := rslt.Hit.Others.filter(pakname)
  1307  			rslt.Hit = &LookupResult{decls, others}
  1308  		}
  1309  		for k, v := range x.idents {
  1310  			ids := byImportCount{v[ident], x.importCount}
  1311  			rslt.Idents[k] = ids.filter(pakname)
  1312  		}
  1313  
  1314  	default:
  1315  		return nil, errors.New("query is not a (qualified) identifier")
  1316  	}
  1317  
  1318  	return rslt, nil
  1319  }
  1320  
  1321  func (x *Index) Snippet(i int) *Snippet {
  1322  	// handle illegal snippet indices gracefully
  1323  	if 0 <= i && i < len(x.snippets) {
  1324  		return x.snippets[i]
  1325  	}
  1326  	return nil
  1327  }
  1328  
  1329  type positionList []struct {
  1330  	filename string
  1331  	line     int
  1332  }
  1333  
  1334  func (list positionList) Len() int           { return len(list) }
  1335  func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
  1336  func (list positionList) Swap(i, j int)      { list[i], list[j] = list[j], list[i] }
  1337  
  1338  // unique returns the list sorted and with duplicate entries removed
  1339  func unique(list []int) []int {
  1340  	sort.Ints(list)
  1341  	var last int
  1342  	i := 0
  1343  	for _, x := range list {
  1344  		if i == 0 || x != last {
  1345  			last = x
  1346  			list[i] = x
  1347  			i++
  1348  		}
  1349  	}
  1350  	return list[0:i]
  1351  }
  1352  
  1353  // A FileLines value specifies a file and line numbers within that file.
  1354  type FileLines struct {
  1355  	Filename string
  1356  	Lines    []int
  1357  }
  1358  
  1359  // LookupRegexp returns the number of matches and the matches where a regular
  1360  // expression r is found in the full text index. At most n matches are
  1361  // returned (thus found <= n).
  1362  //
  1363  func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
  1364  	if x.suffixes == nil || n <= 0 {
  1365  		return
  1366  	}
  1367  	// n > 0
  1368  
  1369  	var list positionList
  1370  	// FindAllIndex may returns matches that span across file boundaries.
  1371  	// Such matches are unlikely, buf after eliminating them we may end up
  1372  	// with fewer than n matches. If we don't have enough at the end, redo
  1373  	// the search with an increased value n1, but only if FindAllIndex
  1374  	// returned all the requested matches in the first place (if it
  1375  	// returned fewer than that there cannot be more).
  1376  	for n1 := n; found < n; n1 += n - found {
  1377  		found = 0
  1378  		matches := x.suffixes.FindAllIndex(r, n1)
  1379  		// compute files, exclude matches that span file boundaries,
  1380  		// and map offsets to file-local offsets
  1381  		list = make(positionList, len(matches))
  1382  		for _, m := range matches {
  1383  			// by construction, an offset corresponds to the Pos value
  1384  			// for the file set - use it to get the file and line
  1385  			p := token.Pos(m[0])
  1386  			if file := x.fset.File(p); file != nil {
  1387  				if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
  1388  					// match [m[0], m[1]) is within the file boundaries
  1389  					list[found].filename = file.Name()
  1390  					list[found].line = file.Line(p)
  1391  					found++
  1392  				}
  1393  			}
  1394  		}
  1395  		if found == n || len(matches) < n1 {
  1396  			// found all matches or there's no chance to find more
  1397  			break
  1398  		}
  1399  	}
  1400  	list = list[0:found]
  1401  	sort.Sort(list) // sort by filename
  1402  
  1403  	// collect matches belonging to the same file
  1404  	var last string
  1405  	var lines []int
  1406  	addLines := func() {
  1407  		if len(lines) > 0 {
  1408  			// remove duplicate lines
  1409  			result = append(result, FileLines{last, unique(lines)})
  1410  			lines = nil
  1411  		}
  1412  	}
  1413  	for _, m := range list {
  1414  		if m.filename != last {
  1415  			addLines()
  1416  			last = m.filename
  1417  		}
  1418  		lines = append(lines, m.line)
  1419  	}
  1420  	addLines()
  1421  
  1422  	return
  1423  }
  1424  
  1425  // InvalidateIndex should be called whenever any of the file systems
  1426  // under godoc's observation change so that the indexer is kicked on.
  1427  func (c *Corpus) invalidateIndex() {
  1428  	c.fsModified.Set(nil)
  1429  	c.refreshMetadata()
  1430  }
  1431  
  1432  // feedDirnames feeds the directory names of all directories
  1433  // under the file system given by root to channel c.
  1434  //
  1435  func (c *Corpus) feedDirnames(ch chan<- string) {
  1436  	if dir, _ := c.fsTree.Get(); dir != nil {
  1437  		for d := range dir.(*Directory).iter(false) {
  1438  			ch <- d.Path
  1439  		}
  1440  	}
  1441  }
  1442  
  1443  // fsDirnames() returns a channel sending all directory names
  1444  // of all the file systems under godoc's observation.
  1445  //
  1446  func (c *Corpus) fsDirnames() <-chan string {
  1447  	ch := make(chan string, 256) // buffered for fewer context switches
  1448  	go func() {
  1449  		c.feedDirnames(ch)
  1450  		close(ch)
  1451  	}()
  1452  	return ch
  1453  }
  1454  
  1455  // CompatibleWith reports whether the Index x is compatible with the corpus
  1456  // indexing options set in c.
  1457  func (x *Index) CompatibleWith(c *Corpus) bool {
  1458  	return x.opts.Docs == c.IndexDocs &&
  1459  		x.opts.GoCode == c.IndexGoCode &&
  1460  		x.opts.FullText == c.IndexFullText &&
  1461  		x.opts.MaxResults == c.MaxResults
  1462  }
  1463  
  1464  func (c *Corpus) readIndex(filenames string) error {
  1465  	matches, err := filepath.Glob(filenames)
  1466  	if err != nil {
  1467  		return err
  1468  	} else if matches == nil {
  1469  		return fmt.Errorf("no index files match %q", filenames)
  1470  	}
  1471  	sort.Strings(matches) // make sure files are in the right order
  1472  	files := make([]io.Reader, 0, len(matches))
  1473  	for _, filename := range matches {
  1474  		f, err := os.Open(filename)
  1475  		if err != nil {
  1476  			return err
  1477  		}
  1478  		defer f.Close()
  1479  		files = append(files, f)
  1480  	}
  1481  	return c.ReadIndexFrom(io.MultiReader(files...))
  1482  }
  1483  
  1484  // ReadIndexFrom sets the current index from the serialized version found in r.
  1485  func (c *Corpus) ReadIndexFrom(r io.Reader) error {
  1486  	x := new(Index)
  1487  	if _, err := x.ReadFrom(r); err != nil {
  1488  		return err
  1489  	}
  1490  	if !x.CompatibleWith(c) {
  1491  		return fmt.Errorf("index file options are incompatible: %v", x.opts)
  1492  	}
  1493  	c.searchIndex.Set(x)
  1494  	return nil
  1495  }
  1496  
  1497  func (c *Corpus) UpdateIndex() {
  1498  	if c.Verbose {
  1499  		log.Printf("updating index...")
  1500  	}
  1501  	start := time.Now()
  1502  	index := c.NewIndex()
  1503  	stop := time.Now()
  1504  	c.searchIndex.Set(index)
  1505  	if c.Verbose {
  1506  		secs := stop.Sub(start).Seconds()
  1507  		stats := index.Stats()
  1508  		log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
  1509  			secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
  1510  	}
  1511  	memstats := new(runtime.MemStats)
  1512  	runtime.ReadMemStats(memstats)
  1513  	if c.Verbose {
  1514  		log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
  1515  	}
  1516  	runtime.GC()
  1517  	runtime.ReadMemStats(memstats)
  1518  	if c.Verbose {
  1519  		log.Printf("after  GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
  1520  	}
  1521  }
  1522  
  1523  // RunIndexer runs forever, indexing.
  1524  func (c *Corpus) RunIndexer() {
  1525  	// initialize the index from disk if possible
  1526  	if c.IndexFiles != "" {
  1527  		c.initFSTree()
  1528  		if err := c.readIndex(c.IndexFiles); err != nil {
  1529  			log.Printf("error reading index from file %s: %v", c.IndexFiles, err)
  1530  		}
  1531  		return
  1532  	}
  1533  
  1534  	// Repeatedly update the package directory tree and index.
  1535  	for {
  1536  		c.initFSTree()
  1537  		c.UpdateIndex()
  1538  		if c.IndexInterval < 0 {
  1539  			return
  1540  		}
  1541  		delay := 5 * time.Minute // by default, reindex every 5 minutes
  1542  		if c.IndexInterval > 0 {
  1543  			delay = c.IndexInterval
  1544  		}
  1545  		time.Sleep(delay)
  1546  	}
  1547  }
  1548  
  1549  type countingWriter struct {
  1550  	n *int64
  1551  	w io.Writer
  1552  }
  1553  
  1554  func (c countingWriter) Write(p []byte) (n int, err error) {
  1555  	n, err = c.w.Write(p)
  1556  	*c.n += int64(n)
  1557  	return
  1558  }
  1559  
  1560  type byteReader interface {
  1561  	io.Reader
  1562  	io.ByteReader
  1563  }
  1564  
  1565  type countingReader struct {
  1566  	n *int64
  1567  	r byteReader
  1568  }
  1569  
  1570  func (c countingReader) Read(p []byte) (n int, err error) {
  1571  	n, err = c.r.Read(p)
  1572  	*c.n += int64(n)
  1573  	return
  1574  }
  1575  
  1576  func (c countingReader) ReadByte() (b byte, err error) {
  1577  	b, err = c.r.ReadByte()
  1578  	*c.n += 1
  1579  	return
  1580  }