golang.org/x/tools/gopls@v0.15.3/internal/golang/workspace_symbol.go (about)

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package golang
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"path/filepath"
    11  	"runtime"
    12  	"sort"
    13  	"strings"
    14  	"unicode"
    15  
    16  	"golang.org/x/tools/gopls/internal/cache"
    17  	"golang.org/x/tools/gopls/internal/cache/metadata"
    18  	"golang.org/x/tools/gopls/internal/protocol"
    19  	"golang.org/x/tools/gopls/internal/settings"
    20  	"golang.org/x/tools/internal/event"
    21  	"golang.org/x/tools/internal/fuzzy"
    22  )
    23  
    24  // maxSymbols defines the maximum number of symbol results that should ever be
    25  // sent in response to a client.
    26  const maxSymbols = 100
    27  
    28  // WorkspaceSymbols matches symbols across all views using the given query,
    29  // according to the match semantics parameterized by matcherType and style.
    30  //
    31  // The workspace symbol method is defined in the spec as follows:
    32  //
    33  //	The workspace symbol request is sent from the client to the server to
    34  //	list project-wide symbols matching the query string.
    35  //
    36  // It is unclear what "project-wide" means here, but given the parameters of
    37  // workspace/symbol do not include any workspace identifier, then it has to be
    38  // assumed that "project-wide" means "across all workspaces".  Hence why
    39  // WorkspaceSymbols receives the views []View.
    40  //
    41  // However, it then becomes unclear what it would mean to call WorkspaceSymbols
    42  // with a different configured SymbolMatcher per View. Therefore we assume that
    43  // Session level configuration will define the SymbolMatcher to be used for the
    44  // WorkspaceSymbols method.
    45  func WorkspaceSymbols(ctx context.Context, matcher settings.SymbolMatcher, style settings.SymbolStyle, snapshots []*cache.Snapshot, query string) ([]protocol.SymbolInformation, error) {
    46  	ctx, done := event.Start(ctx, "golang.WorkspaceSymbols")
    47  	defer done()
    48  	if query == "" {
    49  		return nil, nil
    50  	}
    51  
    52  	var s symbolizer
    53  	switch style {
    54  	case settings.DynamicSymbols:
    55  		s = dynamicSymbolMatch
    56  	case settings.FullyQualifiedSymbols:
    57  		s = fullyQualifiedSymbolMatch
    58  	case settings.PackageQualifiedSymbols:
    59  		s = packageSymbolMatch
    60  	default:
    61  		panic(fmt.Errorf("unknown symbol style: %v", style))
    62  	}
    63  
    64  	return collectSymbols(ctx, snapshots, matcher, s, query)
    65  }
    66  
    67  // A matcherFunc returns the index and score of a symbol match.
    68  //
    69  // See the comment for symbolCollector for more information.
    70  type matcherFunc func(chunks []string) (int, float64)
    71  
    72  // A symbolizer returns the best symbol match for a name with pkg, according to
    73  // some heuristic. The symbol name is passed as the slice nameParts of logical
    74  // name pieces. For example, for myType.field the caller can pass either
    75  // []string{"myType.field"} or []string{"myType.", "field"}.
    76  //
    77  // See the comment for symbolCollector for more information.
    78  //
    79  // The space argument is an empty slice with spare capacity that may be used
    80  // to allocate the result.
    81  type symbolizer func(space []string, name string, pkg *metadata.Package, m matcherFunc) ([]string, float64)
    82  
    83  func fullyQualifiedSymbolMatch(space []string, name string, pkg *metadata.Package, matcher matcherFunc) ([]string, float64) {
    84  	if _, score := dynamicSymbolMatch(space, name, pkg, matcher); score > 0 {
    85  		return append(space, string(pkg.PkgPath), ".", name), score
    86  	}
    87  	return nil, 0
    88  }
    89  
    90  func dynamicSymbolMatch(space []string, name string, pkg *metadata.Package, matcher matcherFunc) ([]string, float64) {
    91  	if metadata.IsCommandLineArguments(pkg.ID) {
    92  		// command-line-arguments packages have a non-sensical package path, so
    93  		// just use their package name.
    94  		return packageSymbolMatch(space, name, pkg, matcher)
    95  	}
    96  
    97  	var score float64
    98  
    99  	endsInPkgName := strings.HasSuffix(string(pkg.PkgPath), string(pkg.Name))
   100  
   101  	// If the package path does not end in the package name, we need to check the
   102  	// package-qualified symbol as an extra pass first.
   103  	if !endsInPkgName {
   104  		pkgQualified := append(space, string(pkg.Name), ".", name)
   105  		idx, score := matcher(pkgQualified)
   106  		nameStart := len(pkg.Name) + 1
   107  		if score > 0 {
   108  			// If our match is contained entirely within the unqualified portion,
   109  			// just return that.
   110  			if idx >= nameStart {
   111  				return append(space, name), score
   112  			}
   113  			// Lower the score for matches that include the package name.
   114  			return pkgQualified, score * 0.8
   115  		}
   116  	}
   117  
   118  	// Now try matching the fully qualified symbol.
   119  	fullyQualified := append(space, string(pkg.PkgPath), ".", name)
   120  	idx, score := matcher(fullyQualified)
   121  
   122  	// As above, check if we matched just the unqualified symbol name.
   123  	nameStart := len(pkg.PkgPath) + 1
   124  	if idx >= nameStart {
   125  		return append(space, name), score
   126  	}
   127  
   128  	// If our package path ends in the package name, we'll have skipped the
   129  	// initial pass above, so check if we matched just the package-qualified
   130  	// name.
   131  	if endsInPkgName && idx >= 0 {
   132  		pkgStart := len(pkg.PkgPath) - len(pkg.Name)
   133  		if idx >= pkgStart {
   134  			return append(space, string(pkg.Name), ".", name), score
   135  		}
   136  	}
   137  
   138  	// Our match was not contained within the unqualified or package qualified
   139  	// symbol. Return the fully qualified symbol but discount the score.
   140  	return fullyQualified, score * 0.6
   141  }
   142  
   143  func packageSymbolMatch(space []string, name string, pkg *metadata.Package, matcher matcherFunc) ([]string, float64) {
   144  	qualified := append(space, string(pkg.Name), ".", name)
   145  	if _, s := matcher(qualified); s > 0 {
   146  		return qualified, s
   147  	}
   148  	return nil, 0
   149  }
   150  
   151  func buildMatcher(matcher settings.SymbolMatcher, query string) matcherFunc {
   152  	switch matcher {
   153  	case settings.SymbolFuzzy:
   154  		return parseQuery(query, newFuzzyMatcher)
   155  	case settings.SymbolFastFuzzy:
   156  		return parseQuery(query, func(query string) matcherFunc {
   157  			return fuzzy.NewSymbolMatcher(query).Match
   158  		})
   159  	case settings.SymbolCaseSensitive:
   160  		return matchExact(query)
   161  	case settings.SymbolCaseInsensitive:
   162  		q := strings.ToLower(query)
   163  		exact := matchExact(q)
   164  		wrapper := []string{""}
   165  		return func(chunks []string) (int, float64) {
   166  			s := strings.Join(chunks, "")
   167  			wrapper[0] = strings.ToLower(s)
   168  			return exact(wrapper)
   169  		}
   170  	}
   171  	panic(fmt.Errorf("unknown symbol matcher: %v", matcher))
   172  }
   173  
   174  func newFuzzyMatcher(query string) matcherFunc {
   175  	fm := fuzzy.NewMatcher(query)
   176  	return func(chunks []string) (int, float64) {
   177  		score := float64(fm.ScoreChunks(chunks))
   178  		ranges := fm.MatchedRanges()
   179  		if len(ranges) > 0 {
   180  			return ranges[0], score
   181  		}
   182  		return -1, score
   183  	}
   184  }
   185  
   186  // parseQuery parses a field-separated symbol query, extracting the special
   187  // characters listed below, and returns a matcherFunc corresponding to the AND
   188  // of all field queries.
   189  //
   190  // Special characters:
   191  //
   192  //	^  match exact prefix
   193  //	$  match exact suffix
   194  //	'  match exact
   195  //
   196  // In all three of these special queries, matches are 'smart-cased', meaning
   197  // they are case sensitive if the symbol query contains any upper-case
   198  // characters, and case insensitive otherwise.
   199  func parseQuery(q string, newMatcher func(string) matcherFunc) matcherFunc {
   200  	fields := strings.Fields(q)
   201  	if len(fields) == 0 {
   202  		return func([]string) (int, float64) { return -1, 0 }
   203  	}
   204  	var funcs []matcherFunc
   205  	for _, field := range fields {
   206  		var f matcherFunc
   207  		switch {
   208  		case strings.HasPrefix(field, "^"):
   209  			prefix := field[1:]
   210  			f = smartCase(prefix, func(chunks []string) (int, float64) {
   211  				s := strings.Join(chunks, "")
   212  				if strings.HasPrefix(s, prefix) {
   213  					return 0, 1
   214  				}
   215  				return -1, 0
   216  			})
   217  		case strings.HasPrefix(field, "'"):
   218  			exact := field[1:]
   219  			f = smartCase(exact, matchExact(exact))
   220  		case strings.HasSuffix(field, "$"):
   221  			suffix := field[0 : len(field)-1]
   222  			f = smartCase(suffix, func(chunks []string) (int, float64) {
   223  				s := strings.Join(chunks, "")
   224  				if strings.HasSuffix(s, suffix) {
   225  					return len(s) - len(suffix), 1
   226  				}
   227  				return -1, 0
   228  			})
   229  		default:
   230  			f = newMatcher(field)
   231  		}
   232  		funcs = append(funcs, f)
   233  	}
   234  	if len(funcs) == 1 {
   235  		return funcs[0]
   236  	}
   237  	return comboMatcher(funcs).match
   238  }
   239  
   240  func matchExact(exact string) matcherFunc {
   241  	return func(chunks []string) (int, float64) {
   242  		s := strings.Join(chunks, "")
   243  		if idx := strings.LastIndex(s, exact); idx >= 0 {
   244  			return idx, 1
   245  		}
   246  		return -1, 0
   247  	}
   248  }
   249  
   250  // smartCase returns a matcherFunc that is case-sensitive if q contains any
   251  // upper-case characters, and case-insensitive otherwise.
   252  func smartCase(q string, m matcherFunc) matcherFunc {
   253  	insensitive := strings.ToLower(q) == q
   254  	wrapper := []string{""}
   255  	return func(chunks []string) (int, float64) {
   256  		s := strings.Join(chunks, "")
   257  		if insensitive {
   258  			s = strings.ToLower(s)
   259  		}
   260  		wrapper[0] = s
   261  		return m(wrapper)
   262  	}
   263  }
   264  
   265  type comboMatcher []matcherFunc
   266  
   267  func (c comboMatcher) match(chunks []string) (int, float64) {
   268  	score := 1.0
   269  	first := 0
   270  	for _, f := range c {
   271  		idx, s := f(chunks)
   272  		if idx < first {
   273  			first = idx
   274  		}
   275  		score *= s
   276  	}
   277  	return first, score
   278  }
   279  
   280  // collectSymbols calls snapshot.Symbols to walk the syntax trees of
   281  // all files in the views' current snapshots, and returns a sorted,
   282  // scored list of symbols that best match the parameters.
   283  //
   284  // How it matches symbols is parameterized by two interfaces:
   285  //   - A matcherFunc determines how well a string symbol matches a query. It
   286  //     returns a non-negative score indicating the quality of the match. A score
   287  //     of zero indicates no match.
   288  //   - A symbolizer determines how we extract the symbol for an object. This
   289  //     enables the 'symbolStyle' configuration option.
   290  func collectSymbols(ctx context.Context, snapshots []*cache.Snapshot, matcherType settings.SymbolMatcher, symbolizer symbolizer, query string) ([]protocol.SymbolInformation, error) {
   291  	// Extract symbols from all files.
   292  	var work []symbolFile
   293  	var roots []string
   294  	seen := make(map[protocol.DocumentURI]bool)
   295  	// TODO(adonovan): opt: parallelize this loop? How often is len > 1?
   296  	for _, snapshot := range snapshots {
   297  		// Use the root view URIs for determining (lexically)
   298  		// whether a URI is in any open workspace.
   299  		folderURI := snapshot.Folder()
   300  		roots = append(roots, strings.TrimRight(string(folderURI), "/"))
   301  
   302  		filters := snapshot.Options().DirectoryFilters
   303  		filterer := cache.NewFilterer(filters)
   304  		folder := filepath.ToSlash(folderURI.Path())
   305  
   306  		workspaceOnly := true
   307  		if snapshot.Options().SymbolScope == settings.AllSymbolScope {
   308  			workspaceOnly = false
   309  		}
   310  		symbols, err := snapshot.Symbols(ctx, workspaceOnly)
   311  		if err != nil {
   312  			return nil, err
   313  		}
   314  
   315  		for uri, syms := range symbols {
   316  			norm := filepath.ToSlash(uri.Path())
   317  			nm := strings.TrimPrefix(norm, folder)
   318  			if filterer.Disallow(nm) {
   319  				continue
   320  			}
   321  			// Only scan each file once.
   322  			if seen[uri] {
   323  				continue
   324  			}
   325  			meta, err := NarrowestMetadataForFile(ctx, snapshot, uri)
   326  			if err != nil {
   327  				event.Error(ctx, fmt.Sprintf("missing metadata for %q", uri), err)
   328  				continue
   329  			}
   330  			seen[uri] = true
   331  			work = append(work, symbolFile{uri, meta, syms})
   332  		}
   333  	}
   334  
   335  	// Match symbols in parallel.
   336  	// Each worker has its own symbolStore,
   337  	// which we merge at the end.
   338  	nmatchers := runtime.GOMAXPROCS(-1) // matching is CPU bound
   339  	results := make(chan *symbolStore)
   340  	for i := 0; i < nmatchers; i++ {
   341  		go func(i int) {
   342  			matcher := buildMatcher(matcherType, query)
   343  			store := new(symbolStore)
   344  			// Assign files to workers in round-robin fashion.
   345  			for j := i; j < len(work); j += nmatchers {
   346  				matchFile(store, symbolizer, matcher, roots, work[j])
   347  			}
   348  			results <- store
   349  		}(i)
   350  	}
   351  
   352  	// Gather and merge results as they arrive.
   353  	var unified symbolStore
   354  	for i := 0; i < nmatchers; i++ {
   355  		store := <-results
   356  		for _, syms := range store.res {
   357  			unified.store(syms)
   358  		}
   359  	}
   360  	return unified.results(), nil
   361  }
   362  
   363  // symbolFile holds symbol information for a single file.
   364  type symbolFile struct {
   365  	uri  protocol.DocumentURI
   366  	mp   *metadata.Package
   367  	syms []cache.Symbol
   368  }
   369  
   370  // matchFile scans a symbol file and adds matching symbols to the store.
   371  func matchFile(store *symbolStore, symbolizer symbolizer, matcher matcherFunc, roots []string, i symbolFile) {
   372  	space := make([]string, 0, 3)
   373  	for _, sym := range i.syms {
   374  		symbolParts, score := symbolizer(space, sym.Name, i.mp, matcher)
   375  
   376  		// Check if the score is too low before applying any downranking.
   377  		if store.tooLow(score) {
   378  			continue
   379  		}
   380  
   381  		// Factors to apply to the match score for the purpose of downranking
   382  		// results.
   383  		//
   384  		// These numbers were crudely calibrated based on trial-and-error using a
   385  		// small number of sample queries. Adjust as necessary.
   386  		//
   387  		// All factors are multiplicative, meaning if more than one applies they are
   388  		// multiplied together.
   389  		const (
   390  			// nonWorkspaceFactor is applied to symbols outside the workspace.
   391  			// Developers are less likely to want to jump to code that they
   392  			// are not actively working on.
   393  			nonWorkspaceFactor = 0.5
   394  			// nonWorkspaceUnexportedFactor is applied to unexported symbols outside
   395  			// the workspace. Since one wouldn't usually jump to unexported
   396  			// symbols to understand a package API, they are particularly irrelevant.
   397  			nonWorkspaceUnexportedFactor = 0.5
   398  			// every field or method nesting level to access the field decreases
   399  			// the score by a factor of 1.0 - depth*depthFactor, up to a depth of
   400  			// 3.
   401  			//
   402  			// Use a small constant here, as this exists mostly to break ties
   403  			// (e.g. given a type Foo and a field x.Foo, prefer Foo).
   404  			depthFactor = 0.01
   405  		)
   406  
   407  		startWord := true
   408  		exported := true
   409  		depth := 0.0
   410  		for _, r := range sym.Name {
   411  			if startWord && !unicode.IsUpper(r) {
   412  				exported = false
   413  			}
   414  			if r == '.' {
   415  				startWord = true
   416  				depth++
   417  			} else {
   418  				startWord = false
   419  			}
   420  		}
   421  
   422  		// TODO(rfindley): use metadata to determine if the file is in a workspace
   423  		// package, rather than this heuristic.
   424  		inWorkspace := false
   425  		for _, root := range roots {
   426  			if strings.HasPrefix(string(i.uri), root) {
   427  				inWorkspace = true
   428  				break
   429  			}
   430  		}
   431  
   432  		// Apply downranking based on workspace position.
   433  		if !inWorkspace {
   434  			score *= nonWorkspaceFactor
   435  			if !exported {
   436  				score *= nonWorkspaceUnexportedFactor
   437  			}
   438  		}
   439  
   440  		// Apply downranking based on symbol depth.
   441  		if depth > 3 {
   442  			depth = 3
   443  		}
   444  		score *= 1.0 - depth*depthFactor
   445  
   446  		if store.tooLow(score) {
   447  			continue
   448  		}
   449  
   450  		si := symbolInformation{
   451  			score:     score,
   452  			symbol:    strings.Join(symbolParts, ""),
   453  			kind:      sym.Kind,
   454  			uri:       i.uri,
   455  			rng:       sym.Range,
   456  			container: string(i.mp.PkgPath),
   457  		}
   458  		store.store(si)
   459  	}
   460  }
   461  
   462  type symbolStore struct {
   463  	res [maxSymbols]symbolInformation
   464  }
   465  
   466  // store inserts si into the sorted results, if si has a high enough score.
   467  func (sc *symbolStore) store(si symbolInformation) {
   468  	if sc.tooLow(si.score) {
   469  		return
   470  	}
   471  	insertAt := sort.Search(len(sc.res), func(i int) bool {
   472  		// Sort by score, then symbol length, and finally lexically.
   473  		if sc.res[i].score != si.score {
   474  			return sc.res[i].score < si.score
   475  		}
   476  		if len(sc.res[i].symbol) != len(si.symbol) {
   477  			return len(sc.res[i].symbol) > len(si.symbol)
   478  		}
   479  		return sc.res[i].symbol > si.symbol
   480  	})
   481  	if insertAt < len(sc.res)-1 {
   482  		copy(sc.res[insertAt+1:], sc.res[insertAt:len(sc.res)-1])
   483  	}
   484  	sc.res[insertAt] = si
   485  }
   486  
   487  func (sc *symbolStore) tooLow(score float64) bool {
   488  	return score <= sc.res[len(sc.res)-1].score
   489  }
   490  
   491  func (sc *symbolStore) results() []protocol.SymbolInformation {
   492  	var res []protocol.SymbolInformation
   493  	for _, si := range sc.res {
   494  		if si.score <= 0 {
   495  			return res
   496  		}
   497  		res = append(res, si.asProtocolSymbolInformation())
   498  	}
   499  	return res
   500  }
   501  
   502  // symbolInformation is a cut-down version of protocol.SymbolInformation that
   503  // allows struct values of this type to be used as map keys.
   504  type symbolInformation struct {
   505  	score     float64
   506  	symbol    string
   507  	container string
   508  	kind      protocol.SymbolKind
   509  	uri       protocol.DocumentURI
   510  	rng       protocol.Range
   511  }
   512  
   513  // asProtocolSymbolInformation converts s to a protocol.SymbolInformation value.
   514  //
   515  // TODO: work out how to handle tags if/when they are needed.
   516  func (s symbolInformation) asProtocolSymbolInformation() protocol.SymbolInformation {
   517  	return protocol.SymbolInformation{
   518  		Name: s.symbol,
   519  		Kind: s.kind,
   520  		Location: protocol.Location{
   521  			URI:   s.uri,
   522  			Range: s.rng,
   523  		},
   524  		ContainerName: s.container,
   525  	}
   526  }