github.com/jhump/golang-x-tools@v0.0.0-20220218190644-4958d6d39439/internal/lsp/source/workspace_symbol.go (about)

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package source
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"go/types"
    11  	"path/filepath"
    12  	"runtime"
    13  	"sort"
    14  	"strings"
    15  	"unicode"
    16  
    17  	"github.com/jhump/golang-x-tools/internal/event"
    18  	"github.com/jhump/golang-x-tools/internal/lsp/fuzzy"
    19  	"github.com/jhump/golang-x-tools/internal/lsp/protocol"
    20  	"github.com/jhump/golang-x-tools/internal/span"
    21  )
    22  
    23  // Symbol holds a precomputed symbol value. Note: we avoid using the
    24  // protocol.SymbolInformation struct here in order to reduce the size of each
    25  // symbol.
    26  type Symbol struct {
    27  	Name  string
    28  	Kind  protocol.SymbolKind
    29  	Range protocol.Range
    30  }
    31  
    32  // maxSymbols defines the maximum number of symbol results that should ever be
    33  // sent in response to a client.
    34  const maxSymbols = 100
    35  
    36  // WorkspaceSymbols matches symbols across all views using the given query,
    37  // according to the match semantics parameterized by matcherType and style.
    38  //
    39  // The workspace symbol method is defined in the spec as follows:
    40  //
    41  //   The workspace symbol request is sent from the client to the server to
    42  //   list project-wide symbols matching the query string.
    43  //
    44  // It is unclear what "project-wide" means here, but given the parameters of
    45  // workspace/symbol do not include any workspace identifier, then it has to be
    46  // assumed that "project-wide" means "across all workspaces".  Hence why
    47  // WorkspaceSymbols receives the views []View.
    48  //
    49  // However, it then becomes unclear what it would mean to call WorkspaceSymbols
    50  // with a different configured SymbolMatcher per View. Therefore we assume that
    51  // Session level configuration will define the SymbolMatcher to be used for the
    52  // WorkspaceSymbols method.
    53  func WorkspaceSymbols(ctx context.Context, matcherType SymbolMatcher, style SymbolStyle, views []View, query string) ([]protocol.SymbolInformation, error) {
    54  	ctx, done := event.Start(ctx, "source.WorkspaceSymbols")
    55  	defer done()
    56  	if query == "" {
    57  		return nil, nil
    58  	}
    59  	sc := newSymbolCollector(matcherType, style, query)
    60  	return sc.walk(ctx, views)
    61  }
    62  
    63  // A matcherFunc returns the index and score of a symbol match.
    64  //
    65  // See the comment for symbolCollector for more information.
    66  type matcherFunc func(chunks []string) (int, float64)
    67  
    68  // A symbolizer returns the best symbol match for a name with pkg, according to
    69  // some heuristic. The symbol name is passed as the slice nameParts of logical
    70  // name pieces. For example, for myType.field the caller can pass either
    71  // []string{"myType.field"} or []string{"myType.", "field"}.
    72  //
    73  // See the comment for symbolCollector for more information.
    74  type symbolizer func(name string, pkg Metadata, m matcherFunc) ([]string, float64)
    75  
    76  func fullyQualifiedSymbolMatch(name string, pkg Metadata, matcher matcherFunc) ([]string, float64) {
    77  	_, score := dynamicSymbolMatch(name, pkg, matcher)
    78  	if score > 0 {
    79  		return []string{pkg.PackagePath(), ".", name}, score
    80  	}
    81  	return nil, 0
    82  }
    83  
    84  func dynamicSymbolMatch(name string, pkg Metadata, matcher matcherFunc) ([]string, float64) {
    85  	var score float64
    86  
    87  	endsInPkgName := strings.HasSuffix(pkg.PackagePath(), pkg.PackageName())
    88  
    89  	// If the package path does not end in the package name, we need to check the
    90  	// package-qualified symbol as an extra pass first.
    91  	if !endsInPkgName {
    92  		pkgQualified := []string{pkg.PackageName(), ".", name}
    93  		idx, score := matcher(pkgQualified)
    94  		nameStart := len(pkg.PackageName()) + 1
    95  		if score > 0 {
    96  			// If our match is contained entirely within the unqualified portion,
    97  			// just return that.
    98  			if idx >= nameStart {
    99  				return []string{name}, score
   100  			}
   101  			// Lower the score for matches that include the package name.
   102  			return pkgQualified, score * 0.8
   103  		}
   104  	}
   105  
   106  	// Now try matching the fully qualified symbol.
   107  	fullyQualified := []string{pkg.PackagePath(), ".", name}
   108  	idx, score := matcher(fullyQualified)
   109  
   110  	// As above, check if we matched just the unqualified symbol name.
   111  	nameStart := len(pkg.PackagePath()) + 1
   112  	if idx >= nameStart {
   113  		return []string{name}, score
   114  	}
   115  
   116  	// If our package path ends in the package name, we'll have skipped the
   117  	// initial pass above, so check if we matched just the package-qualified
   118  	// name.
   119  	if endsInPkgName && idx >= 0 {
   120  		pkgStart := len(pkg.PackagePath()) - len(pkg.PackageName())
   121  		if idx >= pkgStart {
   122  			return []string{pkg.PackageName(), ".", name}, score
   123  		}
   124  	}
   125  
   126  	// Our match was not contained within the unqualified or package qualified
   127  	// symbol. Return the fully qualified symbol but discount the score.
   128  	return fullyQualified, score * 0.6
   129  }
   130  
   131  func packageSymbolMatch(name string, pkg Metadata, matcher matcherFunc) ([]string, float64) {
   132  	qualified := []string{pkg.PackageName(), ".", name}
   133  	if _, s := matcher(qualified); s > 0 {
   134  		return qualified, s
   135  	}
   136  	return nil, 0
   137  }
   138  
   139  // symbolCollector holds context as we walk Packages, gathering symbols that
   140  // match a given query.
   141  //
   142  // How we match symbols is parameterized by two interfaces:
   143  //  * A matcherFunc determines how well a string symbol matches a query. It
   144  //    returns a non-negative score indicating the quality of the match. A score
   145  //    of zero indicates no match.
   146  //  * A symbolizer determines how we extract the symbol for an object. This
   147  //    enables the 'symbolStyle' configuration option.
   148  type symbolCollector struct {
   149  	// These types parameterize the symbol-matching pass.
   150  	matchers   []matcherFunc
   151  	symbolizer symbolizer
   152  
   153  	seen map[span.URI]bool
   154  	symbolStore
   155  }
   156  
   157  func newSymbolCollector(matcher SymbolMatcher, style SymbolStyle, query string) *symbolCollector {
   158  	var s symbolizer
   159  	switch style {
   160  	case DynamicSymbols:
   161  		s = dynamicSymbolMatch
   162  	case FullyQualifiedSymbols:
   163  		s = fullyQualifiedSymbolMatch
   164  	case PackageQualifiedSymbols:
   165  		s = packageSymbolMatch
   166  	default:
   167  		panic(fmt.Errorf("unknown symbol style: %v", style))
   168  	}
   169  	sc := &symbolCollector{symbolizer: s}
   170  	sc.matchers = make([]matcherFunc, runtime.GOMAXPROCS(-1))
   171  	for i := range sc.matchers {
   172  		sc.matchers[i] = buildMatcher(matcher, query)
   173  	}
   174  	return sc
   175  }
   176  
   177  func buildMatcher(matcher SymbolMatcher, query string) matcherFunc {
   178  	switch matcher {
   179  	case SymbolFuzzy:
   180  		return parseQuery(query, newFuzzyMatcher)
   181  	case SymbolFastFuzzy:
   182  		return parseQuery(query, func(query string) matcherFunc {
   183  			return fuzzy.NewSymbolMatcher(query).Match
   184  		})
   185  	case SymbolCaseSensitive:
   186  		return matchExact(query)
   187  	case SymbolCaseInsensitive:
   188  		q := strings.ToLower(query)
   189  		exact := matchExact(q)
   190  		wrapper := []string{""}
   191  		return func(chunks []string) (int, float64) {
   192  			s := strings.Join(chunks, "")
   193  			wrapper[0] = strings.ToLower(s)
   194  			return exact(wrapper)
   195  		}
   196  	}
   197  	panic(fmt.Errorf("unknown symbol matcher: %v", matcher))
   198  }
   199  
   200  func newFuzzyMatcher(query string) matcherFunc {
   201  	fm := fuzzy.NewMatcher(query)
   202  	return func(chunks []string) (int, float64) {
   203  		score := float64(fm.ScoreChunks(chunks))
   204  		ranges := fm.MatchedRanges()
   205  		if len(ranges) > 0 {
   206  			return ranges[0], score
   207  		}
   208  		return -1, score
   209  	}
   210  }
   211  
   212  // parseQuery parses a field-separated symbol query, extracting the special
   213  // characters listed below, and returns a matcherFunc corresponding to the AND
   214  // of all field queries.
   215  //
   216  // Special characters:
   217  //   ^  match exact prefix
   218  //   $  match exact suffix
   219  //   '  match exact
   220  //
   221  // In all three of these special queries, matches are 'smart-cased', meaning
   222  // they are case sensitive if the symbol query contains any upper-case
   223  // characters, and case insensitive otherwise.
   224  func parseQuery(q string, newMatcher func(string) matcherFunc) matcherFunc {
   225  	fields := strings.Fields(q)
   226  	if len(fields) == 0 {
   227  		return func([]string) (int, float64) { return -1, 0 }
   228  	}
   229  	var funcs []matcherFunc
   230  	for _, field := range fields {
   231  		var f matcherFunc
   232  		switch {
   233  		case strings.HasPrefix(field, "^"):
   234  			prefix := field[1:]
   235  			f = smartCase(prefix, func(chunks []string) (int, float64) {
   236  				s := strings.Join(chunks, "")
   237  				if strings.HasPrefix(s, prefix) {
   238  					return 0, 1
   239  				}
   240  				return -1, 0
   241  			})
   242  		case strings.HasPrefix(field, "'"):
   243  			exact := field[1:]
   244  			f = smartCase(exact, matchExact(exact))
   245  		case strings.HasSuffix(field, "$"):
   246  			suffix := field[0 : len(field)-1]
   247  			f = smartCase(suffix, func(chunks []string) (int, float64) {
   248  				s := strings.Join(chunks, "")
   249  				if strings.HasSuffix(s, suffix) {
   250  					return len(s) - len(suffix), 1
   251  				}
   252  				return -1, 0
   253  			})
   254  		default:
   255  			f = newMatcher(field)
   256  		}
   257  		funcs = append(funcs, f)
   258  	}
   259  	if len(funcs) == 1 {
   260  		return funcs[0]
   261  	}
   262  	return comboMatcher(funcs).match
   263  }
   264  
   265  func matchExact(exact string) matcherFunc {
   266  	return func(chunks []string) (int, float64) {
   267  		s := strings.Join(chunks, "")
   268  		if idx := strings.LastIndex(s, exact); idx >= 0 {
   269  			return idx, 1
   270  		}
   271  		return -1, 0
   272  	}
   273  }
   274  
   275  // smartCase returns a matcherFunc that is case-sensitive if q contains any
   276  // upper-case characters, and case-insensitive otherwise.
   277  func smartCase(q string, m matcherFunc) matcherFunc {
   278  	insensitive := strings.ToLower(q) == q
   279  	wrapper := []string{""}
   280  	return func(chunks []string) (int, float64) {
   281  		s := strings.Join(chunks, "")
   282  		if insensitive {
   283  			s = strings.ToLower(s)
   284  		}
   285  		wrapper[0] = s
   286  		return m(wrapper)
   287  	}
   288  }
   289  
   290  type comboMatcher []matcherFunc
   291  
   292  func (c comboMatcher) match(chunks []string) (int, float64) {
   293  	score := 1.0
   294  	first := 0
   295  	for _, f := range c {
   296  		idx, s := f(chunks)
   297  		if idx < first {
   298  			first = idx
   299  		}
   300  		score *= s
   301  	}
   302  	return first, score
   303  }
   304  
   305  func (sc *symbolCollector) walk(ctx context.Context, views []View) ([]protocol.SymbolInformation, error) {
   306  	// Use the root view URIs for determining (lexically) whether a uri is in any
   307  	// open workspace.
   308  	var roots []string
   309  	for _, v := range views {
   310  		roots = append(roots, strings.TrimRight(string(v.Folder()), "/"))
   311  	}
   312  
   313  	results := make(chan *symbolStore)
   314  	matcherlen := len(sc.matchers)
   315  	files := make(map[span.URI]symbolFile)
   316  
   317  	for _, v := range views {
   318  		snapshot, release := v.Snapshot(ctx)
   319  		defer release()
   320  		psyms, err := snapshot.Symbols(ctx)
   321  		if err != nil {
   322  			return nil, err
   323  		}
   324  
   325  		filters := v.Options().DirectoryFilters
   326  		folder := filepath.ToSlash(v.Folder().Filename())
   327  		for uri, syms := range psyms {
   328  			norm := filepath.ToSlash(uri.Filename())
   329  			nm := strings.TrimPrefix(norm, folder)
   330  			if FiltersDisallow(nm, filters) {
   331  				continue
   332  			}
   333  			// Only scan each file once.
   334  			if _, ok := files[uri]; ok {
   335  				continue
   336  			}
   337  			mds, err := snapshot.MetadataForFile(ctx, uri)
   338  			if err != nil {
   339  				event.Error(ctx, fmt.Sprintf("missing metadata for %q", uri), err)
   340  				continue
   341  			}
   342  			if len(mds) == 0 {
   343  				// TODO: should use the bug reporting API
   344  				continue
   345  			}
   346  			files[uri] = symbolFile{uri, mds[0], syms}
   347  		}
   348  	}
   349  
   350  	var work []symbolFile
   351  	for _, f := range files {
   352  		work = append(work, f)
   353  	}
   354  
   355  	// Compute matches concurrently. Each symbolWorker has its own symbolStore,
   356  	// which we merge at the end.
   357  	for i, matcher := range sc.matchers {
   358  		go func(i int, matcher matcherFunc) {
   359  			w := &symbolWorker{
   360  				symbolizer: sc.symbolizer,
   361  				matcher:    matcher,
   362  				ss:         &symbolStore{},
   363  				roots:      roots,
   364  			}
   365  			for j := i; j < len(work); j += matcherlen {
   366  				w.matchFile(work[j])
   367  			}
   368  			results <- w.ss
   369  		}(i, matcher)
   370  	}
   371  
   372  	for i := 0; i < matcherlen; i++ {
   373  		ss := <-results
   374  		for _, si := range ss.res {
   375  			sc.store(si)
   376  		}
   377  	}
   378  	return sc.results(), nil
   379  }
   380  
   381  // FilterDisallow is code from the body of cache.pathExcludedByFilter in cache/view.go
   382  // Exporting and using that function would cause an import cycle.
   383  // Moving it here and exporting it would leave behind view_test.go.
   384  // (This code is exported and used in the body of cache.pathExcludedByFilter)
   385  func FiltersDisallow(path string, filters []string) bool {
   386  	path = strings.TrimPrefix(path, "/")
   387  	var excluded bool
   388  	for _, filter := range filters {
   389  		op, prefix := filter[0], filter[1:]
   390  		// Non-empty prefixes have to be precise directory matches.
   391  		if prefix != "" {
   392  			prefix = prefix + "/"
   393  			path = path + "/"
   394  		}
   395  		if !strings.HasPrefix(path, prefix) {
   396  			continue
   397  		}
   398  		excluded = op == '-'
   399  	}
   400  	return excluded
   401  }
   402  
   403  // symbolFile holds symbol information for a single file.
   404  type symbolFile struct {
   405  	uri  span.URI
   406  	md   Metadata
   407  	syms []Symbol
   408  }
   409  
   410  // symbolWorker matches symbols and captures the highest scoring results.
   411  type symbolWorker struct {
   412  	symbolizer symbolizer
   413  	matcher    matcherFunc
   414  	ss         *symbolStore
   415  	roots      []string
   416  }
   417  
   418  func (w *symbolWorker) matchFile(i symbolFile) {
   419  	for _, sym := range i.syms {
   420  		symbolParts, score := w.symbolizer(sym.Name, i.md, w.matcher)
   421  
   422  		// Check if the score is too low before applying any downranking.
   423  		if w.ss.tooLow(score) {
   424  			continue
   425  		}
   426  
   427  		// Factors to apply to the match score for the purpose of downranking
   428  		// results.
   429  		//
   430  		// These numbers were crudely calibrated based on trial-and-error using a
   431  		// small number of sample queries. Adjust as necessary.
   432  		//
   433  		// All factors are multiplicative, meaning if more than one applies they are
   434  		// multiplied together.
   435  		const (
   436  			// nonWorkspaceFactor is applied to symbols outside of any active
   437  			// workspace. Developers are less likely to want to jump to code that they
   438  			// are not actively working on.
   439  			nonWorkspaceFactor = 0.5
   440  			// nonWorkspaceUnexportedFactor is applied to unexported symbols outside of
   441  			// any active workspace. Since one wouldn't usually jump to unexported
   442  			// symbols to understand a package API, they are particularly irrelevant.
   443  			nonWorkspaceUnexportedFactor = 0.5
   444  			// every field or method nesting level to access the field decreases
   445  			// the score by a factor of 1.0 - depth*depthFactor, up to a depth of
   446  			// 3.
   447  			depthFactor = 0.2
   448  		)
   449  
   450  		startWord := true
   451  		exported := true
   452  		depth := 0.0
   453  		for _, r := range sym.Name {
   454  			if startWord && !unicode.IsUpper(r) {
   455  				exported = false
   456  			}
   457  			if r == '.' {
   458  				startWord = true
   459  				depth++
   460  			} else {
   461  				startWord = false
   462  			}
   463  		}
   464  
   465  		inWorkspace := false
   466  		for _, root := range w.roots {
   467  			if strings.HasPrefix(string(i.uri), root) {
   468  				inWorkspace = true
   469  				break
   470  			}
   471  		}
   472  
   473  		// Apply downranking based on workspace position.
   474  		if !inWorkspace {
   475  			score *= nonWorkspaceFactor
   476  			if !exported {
   477  				score *= nonWorkspaceUnexportedFactor
   478  			}
   479  		}
   480  
   481  		// Apply downranking based on symbol depth.
   482  		if depth > 3 {
   483  			depth = 3
   484  		}
   485  		score *= 1.0 - depth*depthFactor
   486  
   487  		if w.ss.tooLow(score) {
   488  			continue
   489  		}
   490  
   491  		si := symbolInformation{
   492  			score:     score,
   493  			symbol:    strings.Join(symbolParts, ""),
   494  			kind:      sym.Kind,
   495  			uri:       i.uri,
   496  			rng:       sym.Range,
   497  			container: i.md.PackagePath(),
   498  		}
   499  		w.ss.store(si)
   500  	}
   501  }
   502  
   503  type symbolStore struct {
   504  	res [maxSymbols]symbolInformation
   505  }
   506  
   507  // store inserts si into the sorted results, if si has a high enough score.
   508  func (sc *symbolStore) store(si symbolInformation) {
   509  	if sc.tooLow(si.score) {
   510  		return
   511  	}
   512  	insertAt := sort.Search(len(sc.res), func(i int) bool {
   513  		// Sort by score, then symbol length, and finally lexically.
   514  		if sc.res[i].score != si.score {
   515  			return sc.res[i].score < si.score
   516  		}
   517  		if len(sc.res[i].symbol) != len(si.symbol) {
   518  			return len(sc.res[i].symbol) > len(si.symbol)
   519  		}
   520  		return sc.res[i].symbol > si.symbol
   521  	})
   522  	if insertAt < len(sc.res)-1 {
   523  		copy(sc.res[insertAt+1:], sc.res[insertAt:len(sc.res)-1])
   524  	}
   525  	sc.res[insertAt] = si
   526  }
   527  
   528  func (sc *symbolStore) tooLow(score float64) bool {
   529  	return score <= sc.res[len(sc.res)-1].score
   530  }
   531  
   532  func (sc *symbolStore) results() []protocol.SymbolInformation {
   533  	var res []protocol.SymbolInformation
   534  	for _, si := range sc.res {
   535  		if si.score <= 0 {
   536  			return res
   537  		}
   538  		res = append(res, si.asProtocolSymbolInformation())
   539  	}
   540  	return res
   541  }
   542  
   543  func typeToKind(typ types.Type) protocol.SymbolKind {
   544  	switch typ := typ.Underlying().(type) {
   545  	case *types.Interface:
   546  		return protocol.Interface
   547  	case *types.Struct:
   548  		return protocol.Struct
   549  	case *types.Signature:
   550  		if typ.Recv() != nil {
   551  			return protocol.Method
   552  		}
   553  		return protocol.Function
   554  	case *types.Named:
   555  		return typeToKind(typ.Underlying())
   556  	case *types.Basic:
   557  		i := typ.Info()
   558  		switch {
   559  		case i&types.IsNumeric != 0:
   560  			return protocol.Number
   561  		case i&types.IsBoolean != 0:
   562  			return protocol.Boolean
   563  		case i&types.IsString != 0:
   564  			return protocol.String
   565  		}
   566  	}
   567  	return protocol.Variable
   568  }
   569  
   570  // symbolInformation is a cut-down version of protocol.SymbolInformation that
   571  // allows struct values of this type to be used as map keys.
   572  type symbolInformation struct {
   573  	score     float64
   574  	symbol    string
   575  	container string
   576  	kind      protocol.SymbolKind
   577  	uri       span.URI
   578  	rng       protocol.Range
   579  }
   580  
   581  // asProtocolSymbolInformation converts s to a protocol.SymbolInformation value.
   582  //
   583  // TODO: work out how to handle tags if/when they are needed.
   584  func (s symbolInformation) asProtocolSymbolInformation() protocol.SymbolInformation {
   585  	return protocol.SymbolInformation{
   586  		Name: s.symbol,
   587  		Kind: s.kind,
   588  		Location: protocol.Location{
   589  			URI:   protocol.URIFromSpanURI(s.uri),
   590  			Range: s.rng,
   591  		},
   592  		ContainerName: s.container,
   593  	}
   594  }