github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/godoc/index.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file contains the infrastructure to create an
     6  // identifier and full-text index for a set of Go files.
     7  //
     8  // Algorithm for identifier index:
     9  // - traverse all .go files of the file tree specified by root
    10  // - for each identifier (word) encountered, collect all occurrences (spots)
    11  //   into a list; this produces a list of spots for each word
    12  // - reduce the lists: from a list of spots to a list of FileRuns,
    13  //   and from a list of FileRuns into a list of PakRuns
    14  // - make a HitList from the PakRuns
    15  //
    16  // Details:
    17  // - keep two lists per word: one containing package-level declarations
    18  //   that have snippets, and one containing all other spots
    19  // - keep the snippets in a separate table indexed by snippet index
    20  //   and store the snippet index in place of the line number in a SpotInfo
    21  //   (the line number for spots with snippets is stored in the snippet)
    22  // - at the end, create lists of alternative spellings for a given
    23  //   word
    24  //
    25  // Algorithm for full text index:
    26  // - concatenate all source code in a byte buffer (in memory)
    27  // - add the files to a file set in lockstep as they are added to the byte
    28  //   buffer such that a byte buffer offset corresponds to the Pos value for
    29  //   that file location
    30  // - create a suffix array from the concatenated sources
    31  //
    32  // String lookup in full text index:
    33  // - use the suffix array to lookup a string's offsets - the offsets
    34  //   correspond to the Pos values relative to the file set
    35  // - translate the Pos values back into file and line information and
    36  //   sort the result
    37  
    38  package main
    39  
    40  import (
    41  	"bufio"
    42  	"bytes"
    43  	"encoding/gob"
    44  	"errors"
    45  	"go/ast"
    46  	"go/parser"
    47  	"go/token"
    48  	"index/suffixarray"
    49  	"io"
    50  	"os"
    51  	pathpkg "path"
    52  	"regexp"
    53  	"sort"
    54  	"strings"
    55  	"time"
    56  	"unicode"
    57  )
    58  
    59  // ----------------------------------------------------------------------------
    60  // InterfaceSlice is a helper type for sorting interface
    61  // slices according to some slice-specific sort criteria.
    62  
    63  type Comparer func(x, y interface{}) bool
    64  
    65  type InterfaceSlice struct {
    66  	slice []interface{}
    67  	less  Comparer
    68  }
    69  
    70  func (p *InterfaceSlice) Len() int           { return len(p.slice) }
    71  func (p *InterfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
    72  func (p *InterfaceSlice) Swap(i, j int)      { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }
    73  
    74  // ----------------------------------------------------------------------------
    75  // RunList
    76  
    77  // A RunList is a list of entries that can be sorted according to some
    78  // criteria. A RunList may be compressed by grouping "runs" of entries
    79  // which are equal (according to the sort critera) into a new RunList of
    80  // runs. For instance, a RunList containing pairs (x, y) may be compressed
    81  // into a RunList containing pair runs (x, {y}) where each run consists of
    82  // a list of y's with the same x.
    83  type RunList []interface{}
    84  
    85  func (h RunList) sort(less Comparer) {
    86  	sort.Sort(&InterfaceSlice{h, less})
    87  }
    88  
    89  // Compress entries which are the same according to a sort criteria
    90  // (specified by less) into "runs".
    91  func (h RunList) reduce(less Comparer, newRun func(h RunList) interface{}) RunList {
    92  	if len(h) == 0 {
    93  		return nil
    94  	}
    95  	// len(h) > 0
    96  
    97  	// create runs of entries with equal values
    98  	h.sort(less)
    99  
   100  	// for each run, make a new run object and collect them in a new RunList
   101  	var hh RunList
   102  	i, x := 0, h[0]
   103  	for j, y := range h {
   104  		if less(x, y) {
   105  			hh = append(hh, newRun(h[i:j]))
   106  			i, x = j, h[j] // start a new run
   107  		}
   108  	}
   109  	// add final run, if any
   110  	if i < len(h) {
   111  		hh = append(hh, newRun(h[i:]))
   112  	}
   113  
   114  	return hh
   115  }
   116  
   117  // ----------------------------------------------------------------------------
   118  // SpotInfo
   119  
   120  // A SpotInfo value describes a particular identifier spot in a given file;
   121  // It encodes three values: the SpotKind (declaration or use), a line or
   122  // snippet index "lori", and whether it's a line or index.
   123  //
   124  // The following encoding is used:
   125  //
   126  //   bits    32   4    1       0
   127  //   value    [lori|kind|isIndex]
   128  //
   129  type SpotInfo uint32
   130  
   131  // SpotKind describes whether an identifier is declared (and what kind of
   132  // declaration) or used.
   133  type SpotKind uint32
   134  
   135  const (
   136  	PackageClause SpotKind = iota
   137  	ImportDecl
   138  	ConstDecl
   139  	TypeDecl
   140  	VarDecl
   141  	FuncDecl
   142  	MethodDecl
   143  	Use
   144  	nKinds
   145  )
   146  
   147  func init() {
   148  	// sanity check: if nKinds is too large, the SpotInfo
   149  	// accessor functions may need to be updated
   150  	if nKinds > 8 {
   151  		panic("internal error: nKinds > 8")
   152  	}
   153  }
   154  
   155  // makeSpotInfo makes a SpotInfo.
   156  func makeSpotInfo(kind SpotKind, lori int, isIndex bool) SpotInfo {
   157  	// encode lori: bits [4..32)
   158  	x := SpotInfo(lori) << 4
   159  	if int(x>>4) != lori {
   160  		// lori value doesn't fit - since snippet indices are
   161  		// most certainly always smaller then 1<<28, this can
   162  		// only happen for line numbers; give it no line number (= 0)
   163  		x = 0
   164  	}
   165  	// encode kind: bits [1..4)
   166  	x |= SpotInfo(kind) << 1
   167  	// encode isIndex: bit 0
   168  	if isIndex {
   169  		x |= 1
   170  	}
   171  	return x
   172  }
   173  
   174  func (x SpotInfo) Kind() SpotKind { return SpotKind(x >> 1 & 7) }
   175  func (x SpotInfo) Lori() int      { return int(x >> 4) }
   176  func (x SpotInfo) IsIndex() bool  { return x&1 != 0 }
   177  
   178  // ----------------------------------------------------------------------------
   179  // KindRun
   180  
   181  // Debugging support. Disable to see multiple entries per line.
   182  const removeDuplicates = true
   183  
   184  // A KindRun is a run of SpotInfos of the same kind in a given file.
   185  // The kind (3 bits) is stored in each SpotInfo element; to find the
   186  // kind of a KindRun, look at any of it's elements.
   187  type KindRun []SpotInfo
   188  
   189  // KindRuns are sorted by line number or index. Since the isIndex bit
   190  // is always the same for all infos in one list we can compare lori's.
   191  func (k KindRun) Len() int           { return len(k) }
   192  func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
   193  func (k KindRun) Swap(i, j int)      { k[i], k[j] = k[j], k[i] }
   194  
   195  // FileRun contents are sorted by Kind for the reduction into KindRuns.
   196  func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }
   197  
   198  // newKindRun allocates a new KindRun from the SpotInfo run h.
   199  func newKindRun(h RunList) interface{} {
   200  	run := make(KindRun, len(h))
   201  	for i, x := range h {
   202  		run[i] = x.(SpotInfo)
   203  	}
   204  
   205  	// Spots were sorted by file and kind to create this run.
   206  	// Within this run, sort them by line number or index.
   207  	sort.Sort(run)
   208  
   209  	if removeDuplicates {
   210  		// Since both the lori and kind field must be
   211  		// same for duplicates, and since the isIndex
   212  		// bit is always the same for all infos in one
   213  		// list we can simply compare the entire info.
   214  		k := 0
   215  		prev := SpotInfo(1<<32 - 1) // an unlikely value
   216  		for _, x := range run {
   217  			if x != prev {
   218  				run[k] = x
   219  				k++
   220  				prev = x
   221  			}
   222  		}
   223  		run = run[0:k]
   224  	}
   225  
   226  	return run
   227  }
   228  
   229  // ----------------------------------------------------------------------------
   230  // FileRun
   231  
   232  // A Pak describes a Go package.
   233  type Pak struct {
   234  	Path string // path of directory containing the package
   235  	Name string // package name as declared by package clause
   236  }
   237  
   238  // Paks are sorted by name (primary key) and by import path (secondary key).
   239  func (p *Pak) less(q *Pak) bool {
   240  	return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path
   241  }
   242  
   243  // A File describes a Go file.
   244  type File struct {
   245  	Name string // directory-local file name
   246  	Pak  *Pak   // the package to which the file belongs
   247  }
   248  
   249  // Path returns the file path of f.
   250  func (f *File) Path() string {
   251  	return pathpkg.Join(f.Pak.Path, f.Name)
   252  }
   253  
   254  // A Spot describes a single occurrence of a word.
   255  type Spot struct {
   256  	File *File
   257  	Info SpotInfo
   258  }
   259  
   260  // A FileRun is a list of KindRuns belonging to the same file.
   261  type FileRun struct {
   262  	File   *File
   263  	Groups []KindRun
   264  }
   265  
   266  // Spots are sorted by file path for the reduction into FileRuns.
   267  func lessSpot(x, y interface{}) bool {
   268  	fx := x.(Spot).File
   269  	fy := y.(Spot).File
   270  	// same as "return fx.Path() < fy.Path()" but w/o computing the file path first
   271  	px := fx.Pak.Path
   272  	py := fy.Pak.Path
   273  	return px < py || px == py && fx.Name < fy.Name
   274  }
   275  
   276  // newFileRun allocates a new FileRun from the Spot run h.
   277  func newFileRun(h RunList) interface{} {
   278  	file := h[0].(Spot).File
   279  
   280  	// reduce the list of Spots into a list of KindRuns
   281  	h1 := make(RunList, len(h))
   282  	for i, x := range h {
   283  		h1[i] = x.(Spot).Info
   284  	}
   285  	h2 := h1.reduce(lessKind, newKindRun)
   286  
   287  	// create the FileRun
   288  	groups := make([]KindRun, len(h2))
   289  	for i, x := range h2 {
   290  		groups[i] = x.(KindRun)
   291  	}
   292  	return &FileRun{file, groups}
   293  }
   294  
   295  // ----------------------------------------------------------------------------
   296  // PakRun
   297  
   298  // A PakRun describes a run of *FileRuns of a package.
   299  type PakRun struct {
   300  	Pak   *Pak
   301  	Files []*FileRun
   302  }
   303  
   304  // Sorting support for files within a PakRun.
   305  func (p *PakRun) Len() int           { return len(p.Files) }
   306  func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
   307  func (p *PakRun) Swap(i, j int)      { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
   308  
   309  // FileRuns are sorted by package for the reduction into PakRuns.
   310  func lessFileRun(x, y interface{}) bool {
   311  	return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
   312  }
   313  
   314  // newPakRun allocates a new PakRun from the *FileRun run h.
   315  func newPakRun(h RunList) interface{} {
   316  	pak := h[0].(*FileRun).File.Pak
   317  	files := make([]*FileRun, len(h))
   318  	for i, x := range h {
   319  		files[i] = x.(*FileRun)
   320  	}
   321  	run := &PakRun{pak, files}
   322  	sort.Sort(run) // files were sorted by package; sort them by file now
   323  	return run
   324  }
   325  
   326  // ----------------------------------------------------------------------------
   327  // HitList
   328  
   329  // A HitList describes a list of PakRuns.
   330  type HitList []*PakRun
   331  
   332  // PakRuns are sorted by package.
   333  func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }
   334  
   335  func reduce(h0 RunList) HitList {
   336  	// reduce a list of Spots into a list of FileRuns
   337  	h1 := h0.reduce(lessSpot, newFileRun)
   338  	// reduce a list of FileRuns into a list of PakRuns
   339  	h2 := h1.reduce(lessFileRun, newPakRun)
   340  	// sort the list of PakRuns by package
   341  	h2.sort(lessPakRun)
   342  	// create a HitList
   343  	h := make(HitList, len(h2))
   344  	for i, p := range h2 {
   345  		h[i] = p.(*PakRun)
   346  	}
   347  	return h
   348  }
   349  
   350  // filter returns a new HitList created by filtering
   351  // all PakRuns from h that have a matching pakname.
   352  func (h HitList) filter(pakname string) HitList {
   353  	var hh HitList
   354  	for _, p := range h {
   355  		if p.Pak.Name == pakname {
   356  			hh = append(hh, p)
   357  		}
   358  	}
   359  	return hh
   360  }
   361  
   362  // ----------------------------------------------------------------------------
   363  // AltWords
   364  
   365  type wordPair struct {
   366  	canon string // canonical word spelling (all lowercase)
   367  	alt   string // alternative spelling
   368  }
   369  
   370  // An AltWords describes a list of alternative spellings for a
   371  // canonical (all lowercase) spelling of a word.
   372  type AltWords struct {
   373  	Canon string   // canonical word spelling (all lowercase)
   374  	Alts  []string // alternative spelling for the same word
   375  }
   376  
   377  // wordPairs are sorted by their canonical spelling.
   378  func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon }
   379  
   380  // newAltWords allocates a new AltWords from the *wordPair run h.
   381  func newAltWords(h RunList) interface{} {
   382  	canon := h[0].(*wordPair).canon
   383  	alts := make([]string, len(h))
   384  	for i, x := range h {
   385  		alts[i] = x.(*wordPair).alt
   386  	}
   387  	return &AltWords{canon, alts}
   388  }
   389  
   390  func (a *AltWords) filter(s string) *AltWords {
   391  	var alts []string
   392  	for _, w := range a.Alts {
   393  		if w != s {
   394  			alts = append(alts, w)
   395  		}
   396  	}
   397  	if len(alts) > 0 {
   398  		return &AltWords{a.Canon, alts}
   399  	}
   400  	return nil
   401  }
   402  
   403  // ----------------------------------------------------------------------------
   404  // Indexer
   405  
   406  // Adjust these flags as seems best.
   407  const includeMainPackages = true
   408  const includeTestFiles = true
   409  
   410  type IndexResult struct {
   411  	Decls  RunList // package-level declarations (with snippets)
   412  	Others RunList // all other occurrences
   413  }
   414  
   415  // Statistics provides statistics information for an index.
   416  type Statistics struct {
   417  	Bytes int // total size of indexed source files
   418  	Files int // number of indexed source files
   419  	Lines int // number of lines (all files)
   420  	Words int // number of different identifiers
   421  	Spots int // number of identifier occurrences
   422  }
   423  
   424  // An Indexer maintains the data structures and provides the machinery
   425  // for indexing .go files under a file tree. It implements the path.Visitor
   426  // interface for walking file trees, and the ast.Visitor interface for
   427  // walking Go ASTs.
   428  type Indexer struct {
   429  	fset     *token.FileSet          // file set for all indexed files
   430  	sources  bytes.Buffer            // concatenated sources
   431  	packages map[string]*Pak         // map of canonicalized *Paks
   432  	words    map[string]*IndexResult // RunLists of Spots
   433  	snippets []*Snippet              // indices are stored in SpotInfos
   434  	current  *token.File             // last file added to file set
   435  	file     *File                   // AST for current file
   436  	decl     ast.Decl                // AST for current decl
   437  	stats    Statistics
   438  }
   439  
   440  func (x *Indexer) lookupPackage(path, name string) *Pak {
   441  	// In the source directory tree, more than one package may
   442  	// live in the same directory. For the packages map, construct
   443  	// a key that includes both the directory path and the package
   444  	// name.
   445  	key := path + ":" + name
   446  	pak := x.packages[key]
   447  	if pak == nil {
   448  		pak = &Pak{path, name}
   449  		x.packages[key] = pak
   450  	}
   451  	return pak
   452  }
   453  
   454  func (x *Indexer) addSnippet(s *Snippet) int {
   455  	index := len(x.snippets)
   456  	x.snippets = append(x.snippets, s)
   457  	return index
   458  }
   459  
   460  func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
   461  	if id != nil {
   462  		lists, found := x.words[id.Name]
   463  		if !found {
   464  			lists = new(IndexResult)
   465  			x.words[id.Name] = lists
   466  		}
   467  
   468  		if kind == Use || x.decl == nil {
   469  			// not a declaration or no snippet required
   470  			info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
   471  			lists.Others = append(lists.Others, Spot{x.file, info})
   472  		} else {
   473  			// a declaration with snippet
   474  			index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
   475  			info := makeSpotInfo(kind, index, true)
   476  			lists.Decls = append(lists.Decls, Spot{x.file, info})
   477  		}
   478  
   479  		x.stats.Spots++
   480  	}
   481  }
   482  
   483  func (x *Indexer) visitFieldList(kind SpotKind, list *ast.FieldList) {
   484  	for _, f := range list.List {
   485  		x.decl = nil // no snippets for fields
   486  		for _, name := range f.Names {
   487  			x.visitIdent(kind, name)
   488  		}
   489  		ast.Walk(x, f.Type)
   490  		// ignore tag - not indexed at the moment
   491  	}
   492  }
   493  
   494  func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) {
   495  	switch n := spec.(type) {
   496  	case *ast.ImportSpec:
   497  		x.visitIdent(ImportDecl, n.Name)
   498  		// ignore path - not indexed at the moment
   499  
   500  	case *ast.ValueSpec:
   501  		for _, n := range n.Names {
   502  			x.visitIdent(kind, n)
   503  		}
   504  		ast.Walk(x, n.Type)
   505  		for _, v := range n.Values {
   506  			ast.Walk(x, v)
   507  		}
   508  
   509  	case *ast.TypeSpec:
   510  		x.visitIdent(TypeDecl, n.Name)
   511  		ast.Walk(x, n.Type)
   512  	}
   513  }
   514  
   515  func (x *Indexer) visitGenDecl(decl *ast.GenDecl) {
   516  	kind := VarDecl
   517  	if decl.Tok == token.CONST {
   518  		kind = ConstDecl
   519  	}
   520  	x.decl = decl
   521  	for _, s := range decl.Specs {
   522  		x.visitSpec(kind, s)
   523  	}
   524  }
   525  
   526  func (x *Indexer) Visit(node ast.Node) ast.Visitor {
   527  	switch n := node.(type) {
   528  	case nil:
   529  		// nothing to do
   530  
   531  	case *ast.Ident:
   532  		x.visitIdent(Use, n)
   533  
   534  	case *ast.FieldList:
   535  		x.visitFieldList(VarDecl, n)
   536  
   537  	case *ast.InterfaceType:
   538  		x.visitFieldList(MethodDecl, n.Methods)
   539  
   540  	case *ast.DeclStmt:
   541  		// local declarations should only be *ast.GenDecls;
   542  		// ignore incorrect ASTs
   543  		if decl, ok := n.Decl.(*ast.GenDecl); ok {
   544  			x.decl = nil // no snippets for local declarations
   545  			x.visitGenDecl(decl)
   546  		}
   547  
   548  	case *ast.GenDecl:
   549  		x.decl = n
   550  		x.visitGenDecl(n)
   551  
   552  	case *ast.FuncDecl:
   553  		kind := FuncDecl
   554  		if n.Recv != nil {
   555  			kind = MethodDecl
   556  			ast.Walk(x, n.Recv)
   557  		}
   558  		x.decl = n
   559  		x.visitIdent(kind, n.Name)
   560  		ast.Walk(x, n.Type)
   561  		if n.Body != nil {
   562  			ast.Walk(x, n.Body)
   563  		}
   564  
   565  	case *ast.File:
   566  		x.decl = nil
   567  		x.visitIdent(PackageClause, n.Name)
   568  		for _, d := range n.Decls {
   569  			ast.Walk(x, d)
   570  		}
   571  
   572  	default:
   573  		return x
   574  	}
   575  
   576  	return nil
   577  }
   578  
   579  func pkgName(filename string) string {
   580  	// use a new file set each time in order to not pollute the indexer's
   581  	// file set (which must stay in sync with the concatenated source code)
   582  	file, err := parser.ParseFile(token.NewFileSet(), filename, nil, parser.PackageClauseOnly)
   583  	if err != nil || file == nil {
   584  		return ""
   585  	}
   586  	return file.Name.Name
   587  }
   588  
   589  // addFile adds a file to the index if possible and returns the file set file
   590  // and the file's AST if it was successfully parsed as a Go file. If addFile
   591  // failed (that is, if the file was not added), it returns file == nil.
   592  func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) {
   593  	// open file
   594  	f, err := fs.Open(filename)
   595  	if err != nil {
   596  		return
   597  	}
   598  	defer f.Close()
   599  
   600  	// The file set's base offset and x.sources size must be in lock-step;
   601  	// this permits the direct mapping of suffix array lookup results to
   602  	// to corresponding Pos values.
   603  	//
   604  	// When a file is added to the file set, its offset base increases by
   605  	// the size of the file + 1; and the initial base offset is 1. Add an
   606  	// extra byte to the sources here.
   607  	x.sources.WriteByte(0)
   608  
   609  	// If the sources length doesn't match the file set base at this point
   610  	// the file set implementation changed or we have another error.
   611  	base := x.fset.Base()
   612  	if x.sources.Len() != base {
   613  		panic("internal error: file base incorrect")
   614  	}
   615  
   616  	// append file contents (src) to x.sources
   617  	if _, err := x.sources.ReadFrom(f); err == nil {
   618  		src := x.sources.Bytes()[base:]
   619  
   620  		if goFile {
   621  			// parse the file and in the process add it to the file set
   622  			if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
   623  				file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file
   624  				return
   625  			}
   626  			// file has parse errors, and the AST may be incorrect -
   627  			// set lines information explicitly and index as ordinary
   628  			// text file (cannot fall through to the text case below
   629  			// because the file has already been added to the file set
   630  			// by the parser)
   631  			file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file
   632  			file.SetLinesForContent(src)
   633  			ast = nil
   634  			return
   635  		}
   636  
   637  		if isText(src) {
   638  			// only add the file to the file set (for the full text index)
   639  			file = x.fset.AddFile(filename, x.fset.Base(), len(src))
   640  			file.SetLinesForContent(src)
   641  			return
   642  		}
   643  	}
   644  
   645  	// discard possibly added data
   646  	x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added
   647  	return
   648  }
   649  
   650  // Design note: Using an explicit white list of permitted files for indexing
   651  // makes sure that the important files are included and massively reduces the
   652  // number of files to index. The advantage over a blacklist is that unexpected
   653  // (non-blacklisted) files won't suddenly explode the index.
   654  
   655  // Files are whitelisted if they have a file name or extension
   656  // present as key in whitelisted.
   657  var whitelisted = map[string]bool{
   658  	".bash":        true,
   659  	".c":           true,
   660  	".css":         true,
   661  	".go":          true,
   662  	".goc":         true,
   663  	".h":           true,
   664  	".html":        true,
   665  	".js":          true,
   666  	".out":         true,
   667  	".py":          true,
   668  	".s":           true,
   669  	".sh":          true,
   670  	".txt":         true,
   671  	".xml":         true,
   672  	"AUTHORS":      true,
   673  	"CONTRIBUTORS": true,
   674  	"LICENSE":      true,
   675  	"Makefile":     true,
   676  	"PATENTS":      true,
   677  	"README":       true,
   678  }
   679  
   680  // isWhitelisted returns true if a file is on the list
   681  // of "permitted" files for indexing. The filename must
   682  // be the directory-local name of the file.
   683  func isWhitelisted(filename string) bool {
   684  	key := pathpkg.Ext(filename)
   685  	if key == "" {
   686  		// file has no extension - use entire filename
   687  		key = filename
   688  	}
   689  	return whitelisted[key]
   690  }
   691  
   692  func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) {
   693  	if f.IsDir() {
   694  		return
   695  	}
   696  
   697  	filename := pathpkg.Join(dirname, f.Name())
   698  	goFile := false
   699  
   700  	switch {
   701  	case isGoFile(f):
   702  		if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) {
   703  			return
   704  		}
   705  		if !includeMainPackages && pkgName(filename) == "main" {
   706  			return
   707  		}
   708  		goFile = true
   709  
   710  	case !fulltextIndex || !isWhitelisted(f.Name()):
   711  		return
   712  	}
   713  
   714  	file, fast := x.addFile(filename, goFile)
   715  	if file == nil {
   716  		return // addFile failed
   717  	}
   718  
   719  	if fast != nil {
   720  		// we've got a Go file to index
   721  		x.current = file
   722  		pak := x.lookupPackage(dirname, fast.Name.Name)
   723  		x.file = &File{f.Name(), pak}
   724  		ast.Walk(x, fast)
   725  	}
   726  
   727  	// update statistics
   728  	x.stats.Bytes += file.Size()
   729  	x.stats.Files++
   730  	x.stats.Lines += file.LineCount()
   731  }
   732  
   733  // ----------------------------------------------------------------------------
   734  // Index
   735  
   736  type LookupResult struct {
   737  	Decls  HitList // package-level declarations (with snippets)
   738  	Others HitList // all other occurrences
   739  }
   740  
   741  type Index struct {
   742  	fset     *token.FileSet           // file set used during indexing; nil if no textindex
   743  	suffixes *suffixarray.Index       // suffixes for concatenated sources; nil if no textindex
   744  	words    map[string]*LookupResult // maps words to hit lists
   745  	alts     map[string]*AltWords     // maps canonical(words) to lists of alternative spellings
   746  	snippets []*Snippet               // all snippets, indexed by snippet index
   747  	stats    Statistics
   748  }
   749  
   750  func canonical(w string) string { return strings.ToLower(w) }
   751  
   752  // NewIndex creates a new index for the .go files
   753  // in the directories given by dirnames.
   754  //
   755  func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Index {
   756  	var x Indexer
   757  	th := NewThrottle(throttle, 100*time.Millisecond) // run at least 0.1s at a time
   758  
   759  	// initialize Indexer
   760  	// (use some reasonably sized maps to start)
   761  	x.fset = token.NewFileSet()
   762  	x.packages = make(map[string]*Pak, 256)
   763  	x.words = make(map[string]*IndexResult, 8192)
   764  
   765  	// index all files in the directories given by dirnames
   766  	for dirname := range dirnames {
   767  		list, err := fs.ReadDir(dirname)
   768  		if err != nil {
   769  			continue // ignore this directory
   770  		}
   771  		for _, f := range list {
   772  			if !f.IsDir() {
   773  				x.visitFile(dirname, f, fulltextIndex)
   774  			}
   775  			th.Throttle()
   776  		}
   777  	}
   778  
   779  	if !fulltextIndex {
   780  		// the file set, the current file, and the sources are
   781  		// not needed after indexing if no text index is built -
   782  		// help GC and clear them
   783  		x.fset = nil
   784  		x.sources.Reset()
   785  		x.current = nil // contains reference to fset!
   786  	}
   787  
   788  	// for each word, reduce the RunLists into a LookupResult;
   789  	// also collect the word with its canonical spelling in a
   790  	// word list for later computation of alternative spellings
   791  	words := make(map[string]*LookupResult)
   792  	var wlist RunList
   793  	for w, h := range x.words {
   794  		decls := reduce(h.Decls)
   795  		others := reduce(h.Others)
   796  		words[w] = &LookupResult{
   797  			Decls:  decls,
   798  			Others: others,
   799  		}
   800  		wlist = append(wlist, &wordPair{canonical(w), w})
   801  		th.Throttle()
   802  	}
   803  	x.stats.Words = len(words)
   804  
   805  	// reduce the word list {canonical(w), w} into
   806  	// a list of AltWords runs {canonical(w), {w}}
   807  	alist := wlist.reduce(lessWordPair, newAltWords)
   808  
   809  	// convert alist into a map of alternative spellings
   810  	alts := make(map[string]*AltWords)
   811  	for i := 0; i < len(alist); i++ {
   812  		a := alist[i].(*AltWords)
   813  		alts[a.Canon] = a
   814  	}
   815  
   816  	// create text index
   817  	var suffixes *suffixarray.Index
   818  	if fulltextIndex {
   819  		suffixes = suffixarray.New(x.sources.Bytes())
   820  	}
   821  
   822  	return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats}
   823  }
   824  
   825  type fileIndex struct {
   826  	Words    map[string]*LookupResult
   827  	Alts     map[string]*AltWords
   828  	Snippets []*Snippet
   829  	Fulltext bool
   830  }
   831  
   832  func (x *fileIndex) Write(w io.Writer) error {
   833  	return gob.NewEncoder(w).Encode(x)
   834  }
   835  
   836  func (x *fileIndex) Read(r io.Reader) error {
   837  	return gob.NewDecoder(r).Decode(x)
   838  }
   839  
   840  // Write writes the index x to w.
   841  func (x *Index) Write(w io.Writer) error {
   842  	fulltext := false
   843  	if x.suffixes != nil {
   844  		fulltext = true
   845  	}
   846  	fx := fileIndex{
   847  		x.words,
   848  		x.alts,
   849  		x.snippets,
   850  		fulltext,
   851  	}
   852  	if err := fx.Write(w); err != nil {
   853  		return err
   854  	}
   855  	if fulltext {
   856  		encode := func(x interface{}) error {
   857  			return gob.NewEncoder(w).Encode(x)
   858  		}
   859  		if err := x.fset.Write(encode); err != nil {
   860  			return err
   861  		}
   862  		if err := x.suffixes.Write(w); err != nil {
   863  			return err
   864  		}
   865  	}
   866  	return nil
   867  }
   868  
   869  // Read reads the index from r into x; x must not be nil.
   870  // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader.
   871  func (x *Index) Read(r io.Reader) error {
   872  	// We use the ability to read bytes as a plausible surrogate for buffering.
   873  	if _, ok := r.(io.ByteReader); !ok {
   874  		r = bufio.NewReader(r)
   875  	}
   876  	var fx fileIndex
   877  	if err := fx.Read(r); err != nil {
   878  		return err
   879  	}
   880  	x.words = fx.Words
   881  	x.alts = fx.Alts
   882  	x.snippets = fx.Snippets
   883  	if fx.Fulltext {
   884  		x.fset = token.NewFileSet()
   885  		decode := func(x interface{}) error {
   886  			return gob.NewDecoder(r).Decode(x)
   887  		}
   888  		if err := x.fset.Read(decode); err != nil {
   889  			return err
   890  		}
   891  		x.suffixes = new(suffixarray.Index)
   892  		if err := x.suffixes.Read(r); err != nil {
   893  			return err
   894  		}
   895  	}
   896  	return nil
   897  }
   898  
   899  // Stats() returns index statistics.
   900  func (x *Index) Stats() Statistics {
   901  	return x.stats
   902  }
   903  
   904  func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) {
   905  	match = x.words[w]
   906  	alt = x.alts[canonical(w)]
   907  	// remove current spelling from alternatives
   908  	// (if there is no match, the alternatives do
   909  	// not contain the current spelling)
   910  	if match != nil && alt != nil {
   911  		alt = alt.filter(w)
   912  	}
   913  	return
   914  }
   915  
   916  // isIdentifier reports whether s is a Go identifier.
   917  func isIdentifier(s string) bool {
   918  	for i, ch := range s {
   919  		if unicode.IsLetter(ch) || ch == ' ' || i > 0 && unicode.IsDigit(ch) {
   920  			continue
   921  		}
   922  		return false
   923  	}
   924  	return len(s) > 0
   925  }
   926  
   927  // For a given query, which is either a single identifier or a qualified
   928  // identifier, Lookup returns a list of packages, a LookupResult, and a
   929  // list of alternative spellings, if any. Any and all results may be nil.
   930  // If the query syntax is wrong, an error is reported.
   931  func (x *Index) Lookup(query string) (paks HitList, match *LookupResult, alt *AltWords, err error) {
   932  	ss := strings.Split(query, ".")
   933  
   934  	// check query syntax
   935  	for _, s := range ss {
   936  		if !isIdentifier(s) {
   937  			err = errors.New("all query parts must be identifiers")
   938  			return
   939  		}
   940  	}
   941  
   942  	// handle simple and qualified identifiers
   943  	switch len(ss) {
   944  	case 1:
   945  		ident := ss[0]
   946  		match, alt = x.lookupWord(ident)
   947  		if match != nil {
   948  			// found a match - filter packages with same name
   949  			// for the list of packages called ident, if any
   950  			paks = match.Others.filter(ident)
   951  		}
   952  
   953  	case 2:
   954  		pakname, ident := ss[0], ss[1]
   955  		match, alt = x.lookupWord(ident)
   956  		if match != nil {
   957  			// found a match - filter by package name
   958  			// (no paks - package names are not qualified)
   959  			decls := match.Decls.filter(pakname)
   960  			others := match.Others.filter(pakname)
   961  			match = &LookupResult{decls, others}
   962  		}
   963  
   964  	default:
   965  		err = errors.New("query is not a (qualified) identifier")
   966  	}
   967  
   968  	return
   969  }
   970  
   971  func (x *Index) Snippet(i int) *Snippet {
   972  	// handle illegal snippet indices gracefully
   973  	if 0 <= i && i < len(x.snippets) {
   974  		return x.snippets[i]
   975  	}
   976  	return nil
   977  }
   978  
   979  type positionList []struct {
   980  	filename string
   981  	line     int
   982  }
   983  
   984  func (list positionList) Len() int           { return len(list) }
   985  func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
   986  func (list positionList) Swap(i, j int)      { list[i], list[j] = list[j], list[i] }
   987  
   988  // unique returns the list sorted and with duplicate entries removed
   989  func unique(list []int) []int {
   990  	sort.Ints(list)
   991  	var last int
   992  	i := 0
   993  	for _, x := range list {
   994  		if i == 0 || x != last {
   995  			last = x
   996  			list[i] = x
   997  			i++
   998  		}
   999  	}
  1000  	return list[0:i]
  1001  }
  1002  
  1003  // A FileLines value specifies a file and line numbers within that file.
  1004  type FileLines struct {
  1005  	Filename string
  1006  	Lines    []int
  1007  }
  1008  
  1009  // LookupRegexp returns the number of matches and the matches where a regular
  1010  // expression r is found in the full text index. At most n matches are
  1011  // returned (thus found <= n).
  1012  //
  1013  func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
  1014  	if x.suffixes == nil || n <= 0 {
  1015  		return
  1016  	}
  1017  	// n > 0
  1018  
  1019  	var list positionList
  1020  	// FindAllIndex may returns matches that span across file boundaries.
  1021  	// Such matches are unlikely, buf after eliminating them we may end up
  1022  	// with fewer than n matches. If we don't have enough at the end, redo
  1023  	// the search with an increased value n1, but only if FindAllIndex
  1024  	// returned all the requested matches in the first place (if it
  1025  	// returned fewer than that there cannot be more).
  1026  	for n1 := n; found < n; n1 += n - found {
  1027  		found = 0
  1028  		matches := x.suffixes.FindAllIndex(r, n1)
  1029  		// compute files, exclude matches that span file boundaries,
  1030  		// and map offsets to file-local offsets
  1031  		list = make(positionList, len(matches))
  1032  		for _, m := range matches {
  1033  			// by construction, an offset corresponds to the Pos value
  1034  			// for the file set - use it to get the file and line
  1035  			p := token.Pos(m[0])
  1036  			if file := x.fset.File(p); file != nil {
  1037  				if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
  1038  					// match [m[0], m[1]) is within the file boundaries
  1039  					list[found].filename = file.Name()
  1040  					list[found].line = file.Line(p)
  1041  					found++
  1042  				}
  1043  			}
  1044  		}
  1045  		if found == n || len(matches) < n1 {
  1046  			// found all matches or there's no chance to find more
  1047  			break
  1048  		}
  1049  	}
  1050  	list = list[0:found]
  1051  	sort.Sort(list) // sort by filename
  1052  
  1053  	// collect matches belonging to the same file
  1054  	var last string
  1055  	var lines []int
  1056  	addLines := func() {
  1057  		if len(lines) > 0 {
  1058  			// remove duplicate lines
  1059  			result = append(result, FileLines{last, unique(lines)})
  1060  			lines = nil
  1061  		}
  1062  	}
  1063  	for _, m := range list {
  1064  		if m.filename != last {
  1065  			addLines()
  1066  			last = m.filename
  1067  		}
  1068  		lines = append(lines, m.line)
  1069  	}
  1070  	addLines()
  1071  
  1072  	return
  1073  }