github.com/powerman/golang-tools@v0.1.11-0.20220410185822-5ad214d8d803/internal/lsp/source/workspace_symbol.go (about)

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package source
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"go/types"
    11  	"path/filepath"
    12  	"runtime"
    13  	"sort"
    14  	"strings"
    15  	"unicode"
    16  
    17  	"github.com/powerman/golang-tools/internal/event"
    18  	"github.com/powerman/golang-tools/internal/lsp/fuzzy"
    19  	"github.com/powerman/golang-tools/internal/lsp/protocol"
    20  	"github.com/powerman/golang-tools/internal/span"
    21  )
    22  
    23  // Symbol holds a precomputed symbol value. Note: we avoid using the
    24  // protocol.SymbolInformation struct here in order to reduce the size of each
    25  // symbol.
    26  type Symbol struct {
    27  	Name  string
    28  	Kind  protocol.SymbolKind
    29  	Range protocol.Range
    30  }
    31  
    32  // maxSymbols defines the maximum number of symbol results that should ever be
    33  // sent in response to a client.
    34  const maxSymbols = 100
    35  
    36  // WorkspaceSymbols matches symbols across all views using the given query,
    37  // according to the match semantics parameterized by matcherType and style.
    38  //
    39  // The workspace symbol method is defined in the spec as follows:
    40  //
    41  //   The workspace symbol request is sent from the client to the server to
    42  //   list project-wide symbols matching the query string.
    43  //
    44  // It is unclear what "project-wide" means here, but given the parameters of
    45  // workspace/symbol do not include any workspace identifier, then it has to be
    46  // assumed that "project-wide" means "across all workspaces".  Hence why
    47  // WorkspaceSymbols receives the views []View.
    48  //
    49  // However, it then becomes unclear what it would mean to call WorkspaceSymbols
    50  // with a different configured SymbolMatcher per View. Therefore we assume that
    51  // Session level configuration will define the SymbolMatcher to be used for the
    52  // WorkspaceSymbols method.
    53  func WorkspaceSymbols(ctx context.Context, matcherType SymbolMatcher, style SymbolStyle, views []View, query string) ([]protocol.SymbolInformation, error) {
    54  	ctx, done := event.Start(ctx, "source.WorkspaceSymbols")
    55  	defer done()
    56  	if query == "" {
    57  		return nil, nil
    58  	}
    59  	sc := newSymbolCollector(matcherType, style, query)
    60  	return sc.walk(ctx, views)
    61  }
    62  
    63  // A matcherFunc returns the index and score of a symbol match.
    64  //
    65  // See the comment for symbolCollector for more information.
    66  type matcherFunc func(chunks []string) (int, float64)
    67  
    68  // A symbolizer returns the best symbol match for a name with pkg, according to
    69  // some heuristic. The symbol name is passed as the slice nameParts of logical
    70  // name pieces. For example, for myType.field the caller can pass either
    71  // []string{"myType.field"} or []string{"myType.", "field"}.
    72  //
    73  // See the comment for symbolCollector for more information.
    74  type symbolizer func(name string, pkg Metadata, m matcherFunc) ([]string, float64)
    75  
    76  func fullyQualifiedSymbolMatch(name string, pkg Metadata, matcher matcherFunc) ([]string, float64) {
    77  	_, score := dynamicSymbolMatch(name, pkg, matcher)
    78  	if score > 0 {
    79  		return []string{pkg.PackagePath(), ".", name}, score
    80  	}
    81  	return nil, 0
    82  }
    83  
    84  func dynamicSymbolMatch(name string, pkg Metadata, matcher matcherFunc) ([]string, float64) {
    85  	var score float64
    86  
    87  	endsInPkgName := strings.HasSuffix(pkg.PackagePath(), pkg.PackageName())
    88  
    89  	// If the package path does not end in the package name, we need to check the
    90  	// package-qualified symbol as an extra pass first.
    91  	if !endsInPkgName {
    92  		pkgQualified := []string{pkg.PackageName(), ".", name}
    93  		idx, score := matcher(pkgQualified)
    94  		nameStart := len(pkg.PackageName()) + 1
    95  		if score > 0 {
    96  			// If our match is contained entirely within the unqualified portion,
    97  			// just return that.
    98  			if idx >= nameStart {
    99  				return []string{name}, score
   100  			}
   101  			// Lower the score for matches that include the package name.
   102  			return pkgQualified, score * 0.8
   103  		}
   104  	}
   105  
   106  	// Now try matching the fully qualified symbol.
   107  	fullyQualified := []string{pkg.PackagePath(), ".", name}
   108  	idx, score := matcher(fullyQualified)
   109  
   110  	// As above, check if we matched just the unqualified symbol name.
   111  	nameStart := len(pkg.PackagePath()) + 1
   112  	if idx >= nameStart {
   113  		return []string{name}, score
   114  	}
   115  
   116  	// If our package path ends in the package name, we'll have skipped the
   117  	// initial pass above, so check if we matched just the package-qualified
   118  	// name.
   119  	if endsInPkgName && idx >= 0 {
   120  		pkgStart := len(pkg.PackagePath()) - len(pkg.PackageName())
   121  		if idx >= pkgStart {
   122  			return []string{pkg.PackageName(), ".", name}, score
   123  		}
   124  	}
   125  
   126  	// Our match was not contained within the unqualified or package qualified
   127  	// symbol. Return the fully qualified symbol but discount the score.
   128  	return fullyQualified, score * 0.6
   129  }
   130  
   131  func packageSymbolMatch(name string, pkg Metadata, matcher matcherFunc) ([]string, float64) {
   132  	qualified := []string{pkg.PackageName(), ".", name}
   133  	if _, s := matcher(qualified); s > 0 {
   134  		return qualified, s
   135  	}
   136  	return nil, 0
   137  }
   138  
   139  // symbolCollector holds context as we walk Packages, gathering symbols that
   140  // match a given query.
   141  //
   142  // How we match symbols is parameterized by two interfaces:
   143  //  * A matcherFunc determines how well a string symbol matches a query. It
   144  //    returns a non-negative score indicating the quality of the match. A score
   145  //    of zero indicates no match.
   146  //  * A symbolizer determines how we extract the symbol for an object. This
   147  //    enables the 'symbolStyle' configuration option.
   148  type symbolCollector struct {
   149  	// These types parameterize the symbol-matching pass.
   150  	matchers   []matcherFunc
   151  	symbolizer symbolizer
   152  
   153  	symbolStore
   154  }
   155  
   156  func newSymbolCollector(matcher SymbolMatcher, style SymbolStyle, query string) *symbolCollector {
   157  	var s symbolizer
   158  	switch style {
   159  	case DynamicSymbols:
   160  		s = dynamicSymbolMatch
   161  	case FullyQualifiedSymbols:
   162  		s = fullyQualifiedSymbolMatch
   163  	case PackageQualifiedSymbols:
   164  		s = packageSymbolMatch
   165  	default:
   166  		panic(fmt.Errorf("unknown symbol style: %v", style))
   167  	}
   168  	sc := &symbolCollector{symbolizer: s}
   169  	sc.matchers = make([]matcherFunc, runtime.GOMAXPROCS(-1))
   170  	for i := range sc.matchers {
   171  		sc.matchers[i] = buildMatcher(matcher, query)
   172  	}
   173  	return sc
   174  }
   175  
   176  func buildMatcher(matcher SymbolMatcher, query string) matcherFunc {
   177  	switch matcher {
   178  	case SymbolFuzzy:
   179  		return parseQuery(query, newFuzzyMatcher)
   180  	case SymbolFastFuzzy:
   181  		return parseQuery(query, func(query string) matcherFunc {
   182  			return fuzzy.NewSymbolMatcher(query).Match
   183  		})
   184  	case SymbolCaseSensitive:
   185  		return matchExact(query)
   186  	case SymbolCaseInsensitive:
   187  		q := strings.ToLower(query)
   188  		exact := matchExact(q)
   189  		wrapper := []string{""}
   190  		return func(chunks []string) (int, float64) {
   191  			s := strings.Join(chunks, "")
   192  			wrapper[0] = strings.ToLower(s)
   193  			return exact(wrapper)
   194  		}
   195  	}
   196  	panic(fmt.Errorf("unknown symbol matcher: %v", matcher))
   197  }
   198  
   199  func newFuzzyMatcher(query string) matcherFunc {
   200  	fm := fuzzy.NewMatcher(query)
   201  	return func(chunks []string) (int, float64) {
   202  		score := float64(fm.ScoreChunks(chunks))
   203  		ranges := fm.MatchedRanges()
   204  		if len(ranges) > 0 {
   205  			return ranges[0], score
   206  		}
   207  		return -1, score
   208  	}
   209  }
   210  
   211  // parseQuery parses a field-separated symbol query, extracting the special
   212  // characters listed below, and returns a matcherFunc corresponding to the AND
   213  // of all field queries.
   214  //
   215  // Special characters:
   216  //   ^  match exact prefix
   217  //   $  match exact suffix
   218  //   '  match exact
   219  //
   220  // In all three of these special queries, matches are 'smart-cased', meaning
   221  // they are case sensitive if the symbol query contains any upper-case
   222  // characters, and case insensitive otherwise.
   223  func parseQuery(q string, newMatcher func(string) matcherFunc) matcherFunc {
   224  	fields := strings.Fields(q)
   225  	if len(fields) == 0 {
   226  		return func([]string) (int, float64) { return -1, 0 }
   227  	}
   228  	var funcs []matcherFunc
   229  	for _, field := range fields {
   230  		var f matcherFunc
   231  		switch {
   232  		case strings.HasPrefix(field, "^"):
   233  			prefix := field[1:]
   234  			f = smartCase(prefix, func(chunks []string) (int, float64) {
   235  				s := strings.Join(chunks, "")
   236  				if strings.HasPrefix(s, prefix) {
   237  					return 0, 1
   238  				}
   239  				return -1, 0
   240  			})
   241  		case strings.HasPrefix(field, "'"):
   242  			exact := field[1:]
   243  			f = smartCase(exact, matchExact(exact))
   244  		case strings.HasSuffix(field, "$"):
   245  			suffix := field[0 : len(field)-1]
   246  			f = smartCase(suffix, func(chunks []string) (int, float64) {
   247  				s := strings.Join(chunks, "")
   248  				if strings.HasSuffix(s, suffix) {
   249  					return len(s) - len(suffix), 1
   250  				}
   251  				return -1, 0
   252  			})
   253  		default:
   254  			f = newMatcher(field)
   255  		}
   256  		funcs = append(funcs, f)
   257  	}
   258  	if len(funcs) == 1 {
   259  		return funcs[0]
   260  	}
   261  	return comboMatcher(funcs).match
   262  }
   263  
   264  func matchExact(exact string) matcherFunc {
   265  	return func(chunks []string) (int, float64) {
   266  		s := strings.Join(chunks, "")
   267  		if idx := strings.LastIndex(s, exact); idx >= 0 {
   268  			return idx, 1
   269  		}
   270  		return -1, 0
   271  	}
   272  }
   273  
   274  // smartCase returns a matcherFunc that is case-sensitive if q contains any
   275  // upper-case characters, and case-insensitive otherwise.
   276  func smartCase(q string, m matcherFunc) matcherFunc {
   277  	insensitive := strings.ToLower(q) == q
   278  	wrapper := []string{""}
   279  	return func(chunks []string) (int, float64) {
   280  		s := strings.Join(chunks, "")
   281  		if insensitive {
   282  			s = strings.ToLower(s)
   283  		}
   284  		wrapper[0] = s
   285  		return m(wrapper)
   286  	}
   287  }
   288  
   289  type comboMatcher []matcherFunc
   290  
   291  func (c comboMatcher) match(chunks []string) (int, float64) {
   292  	score := 1.0
   293  	first := 0
   294  	for _, f := range c {
   295  		idx, s := f(chunks)
   296  		if idx < first {
   297  			first = idx
   298  		}
   299  		score *= s
   300  	}
   301  	return first, score
   302  }
   303  
   304  func (sc *symbolCollector) walk(ctx context.Context, views []View) ([]protocol.SymbolInformation, error) {
   305  	// Use the root view URIs for determining (lexically) whether a uri is in any
   306  	// open workspace.
   307  	var roots []string
   308  	for _, v := range views {
   309  		roots = append(roots, strings.TrimRight(string(v.Folder()), "/"))
   310  	}
   311  
   312  	results := make(chan *symbolStore)
   313  	matcherlen := len(sc.matchers)
   314  	files := make(map[span.URI]symbolFile)
   315  
   316  	for _, v := range views {
   317  		snapshot, release := v.Snapshot(ctx)
   318  		defer release()
   319  		psyms, err := snapshot.Symbols(ctx)
   320  		if err != nil {
   321  			return nil, err
   322  		}
   323  
   324  		filters := v.Options().DirectoryFilters
   325  		folder := filepath.ToSlash(v.Folder().Filename())
   326  		for uri, syms := range psyms {
   327  			norm := filepath.ToSlash(uri.Filename())
   328  			nm := strings.TrimPrefix(norm, folder)
   329  			if FiltersDisallow(nm, filters) {
   330  				continue
   331  			}
   332  			// Only scan each file once.
   333  			if _, ok := files[uri]; ok {
   334  				continue
   335  			}
   336  			mds, err := snapshot.MetadataForFile(ctx, uri)
   337  			if err != nil {
   338  				event.Error(ctx, fmt.Sprintf("missing metadata for %q", uri), err)
   339  				continue
   340  			}
   341  			if len(mds) == 0 {
   342  				// TODO: should use the bug reporting API
   343  				continue
   344  			}
   345  			files[uri] = symbolFile{uri, mds[0], syms}
   346  		}
   347  	}
   348  
   349  	var work []symbolFile
   350  	for _, f := range files {
   351  		work = append(work, f)
   352  	}
   353  
   354  	// Compute matches concurrently. Each symbolWorker has its own symbolStore,
   355  	// which we merge at the end.
   356  	for i, matcher := range sc.matchers {
   357  		go func(i int, matcher matcherFunc) {
   358  			w := &symbolWorker{
   359  				symbolizer: sc.symbolizer,
   360  				matcher:    matcher,
   361  				ss:         &symbolStore{},
   362  				roots:      roots,
   363  			}
   364  			for j := i; j < len(work); j += matcherlen {
   365  				w.matchFile(work[j])
   366  			}
   367  			results <- w.ss
   368  		}(i, matcher)
   369  	}
   370  
   371  	for i := 0; i < matcherlen; i++ {
   372  		ss := <-results
   373  		for _, si := range ss.res {
   374  			sc.store(si)
   375  		}
   376  	}
   377  	return sc.results(), nil
   378  }
   379  
   380  // FilterDisallow is code from the body of cache.pathExcludedByFilter in cache/view.go
   381  // Exporting and using that function would cause an import cycle.
   382  // Moving it here and exporting it would leave behind view_test.go.
   383  // (This code is exported and used in the body of cache.pathExcludedByFilter)
   384  func FiltersDisallow(path string, filters []string) bool {
   385  	path = strings.TrimPrefix(path, "/")
   386  	var excluded bool
   387  	for _, filter := range filters {
   388  		op, prefix := filter[0], filter[1:]
   389  		// Non-empty prefixes have to be precise directory matches.
   390  		if prefix != "" {
   391  			prefix = prefix + "/"
   392  			path = path + "/"
   393  		}
   394  		if !strings.HasPrefix(path, prefix) {
   395  			continue
   396  		}
   397  		excluded = op == '-'
   398  	}
   399  	return excluded
   400  }
   401  
   402  // symbolFile holds symbol information for a single file.
   403  type symbolFile struct {
   404  	uri  span.URI
   405  	md   Metadata
   406  	syms []Symbol
   407  }
   408  
   409  // symbolWorker matches symbols and captures the highest scoring results.
   410  type symbolWorker struct {
   411  	symbolizer symbolizer
   412  	matcher    matcherFunc
   413  	ss         *symbolStore
   414  	roots      []string
   415  }
   416  
   417  func (w *symbolWorker) matchFile(i symbolFile) {
   418  	for _, sym := range i.syms {
   419  		symbolParts, score := w.symbolizer(sym.Name, i.md, w.matcher)
   420  
   421  		// Check if the score is too low before applying any downranking.
   422  		if w.ss.tooLow(score) {
   423  			continue
   424  		}
   425  
   426  		// Factors to apply to the match score for the purpose of downranking
   427  		// results.
   428  		//
   429  		// These numbers were crudely calibrated based on trial-and-error using a
   430  		// small number of sample queries. Adjust as necessary.
   431  		//
   432  		// All factors are multiplicative, meaning if more than one applies they are
   433  		// multiplied together.
   434  		const (
   435  			// nonWorkspaceFactor is applied to symbols outside of any active
   436  			// workspace. Developers are less likely to want to jump to code that they
   437  			// are not actively working on.
   438  			nonWorkspaceFactor = 0.5
   439  			// nonWorkspaceUnexportedFactor is applied to unexported symbols outside of
   440  			// any active workspace. Since one wouldn't usually jump to unexported
   441  			// symbols to understand a package API, they are particularly irrelevant.
   442  			nonWorkspaceUnexportedFactor = 0.5
   443  			// every field or method nesting level to access the field decreases
   444  			// the score by a factor of 1.0 - depth*depthFactor, up to a depth of
   445  			// 3.
   446  			depthFactor = 0.2
   447  		)
   448  
   449  		startWord := true
   450  		exported := true
   451  		depth := 0.0
   452  		for _, r := range sym.Name {
   453  			if startWord && !unicode.IsUpper(r) {
   454  				exported = false
   455  			}
   456  			if r == '.' {
   457  				startWord = true
   458  				depth++
   459  			} else {
   460  				startWord = false
   461  			}
   462  		}
   463  
   464  		inWorkspace := false
   465  		for _, root := range w.roots {
   466  			if strings.HasPrefix(string(i.uri), root) {
   467  				inWorkspace = true
   468  				break
   469  			}
   470  		}
   471  
   472  		// Apply downranking based on workspace position.
   473  		if !inWorkspace {
   474  			score *= nonWorkspaceFactor
   475  			if !exported {
   476  				score *= nonWorkspaceUnexportedFactor
   477  			}
   478  		}
   479  
   480  		// Apply downranking based on symbol depth.
   481  		if depth > 3 {
   482  			depth = 3
   483  		}
   484  		score *= 1.0 - depth*depthFactor
   485  
   486  		if w.ss.tooLow(score) {
   487  			continue
   488  		}
   489  
   490  		si := symbolInformation{
   491  			score:     score,
   492  			symbol:    strings.Join(symbolParts, ""),
   493  			kind:      sym.Kind,
   494  			uri:       i.uri,
   495  			rng:       sym.Range,
   496  			container: i.md.PackagePath(),
   497  		}
   498  		w.ss.store(si)
   499  	}
   500  }
   501  
   502  type symbolStore struct {
   503  	res [maxSymbols]symbolInformation
   504  }
   505  
   506  // store inserts si into the sorted results, if si has a high enough score.
   507  func (sc *symbolStore) store(si symbolInformation) {
   508  	if sc.tooLow(si.score) {
   509  		return
   510  	}
   511  	insertAt := sort.Search(len(sc.res), func(i int) bool {
   512  		// Sort by score, then symbol length, and finally lexically.
   513  		if sc.res[i].score != si.score {
   514  			return sc.res[i].score < si.score
   515  		}
   516  		if len(sc.res[i].symbol) != len(si.symbol) {
   517  			return len(sc.res[i].symbol) > len(si.symbol)
   518  		}
   519  		return sc.res[i].symbol > si.symbol
   520  	})
   521  	if insertAt < len(sc.res)-1 {
   522  		copy(sc.res[insertAt+1:], sc.res[insertAt:len(sc.res)-1])
   523  	}
   524  	sc.res[insertAt] = si
   525  }
   526  
   527  func (sc *symbolStore) tooLow(score float64) bool {
   528  	return score <= sc.res[len(sc.res)-1].score
   529  }
   530  
   531  func (sc *symbolStore) results() []protocol.SymbolInformation {
   532  	var res []protocol.SymbolInformation
   533  	for _, si := range sc.res {
   534  		if si.score <= 0 {
   535  			return res
   536  		}
   537  		res = append(res, si.asProtocolSymbolInformation())
   538  	}
   539  	return res
   540  }
   541  
   542  func typeToKind(typ types.Type) protocol.SymbolKind {
   543  	switch typ := typ.Underlying().(type) {
   544  	case *types.Interface:
   545  		return protocol.Interface
   546  	case *types.Struct:
   547  		return protocol.Struct
   548  	case *types.Signature:
   549  		if typ.Recv() != nil {
   550  			return protocol.Method
   551  		}
   552  		return protocol.Function
   553  	case *types.Named:
   554  		return typeToKind(typ.Underlying())
   555  	case *types.Basic:
   556  		i := typ.Info()
   557  		switch {
   558  		case i&types.IsNumeric != 0:
   559  			return protocol.Number
   560  		case i&types.IsBoolean != 0:
   561  			return protocol.Boolean
   562  		case i&types.IsString != 0:
   563  			return protocol.String
   564  		}
   565  	}
   566  	return protocol.Variable
   567  }
   568  
   569  // symbolInformation is a cut-down version of protocol.SymbolInformation that
   570  // allows struct values of this type to be used as map keys.
   571  type symbolInformation struct {
   572  	score     float64
   573  	symbol    string
   574  	container string
   575  	kind      protocol.SymbolKind
   576  	uri       span.URI
   577  	rng       protocol.Range
   578  }
   579  
   580  // asProtocolSymbolInformation converts s to a protocol.SymbolInformation value.
   581  //
   582  // TODO: work out how to handle tags if/when they are needed.
   583  func (s symbolInformation) asProtocolSymbolInformation() protocol.SymbolInformation {
   584  	return protocol.SymbolInformation{
   585  		Name: s.symbol,
   586  		Kind: s.kind,
   587  		Location: protocol.Location{
   588  			URI:   protocol.URIFromSpanURI(s.uri),
   589  			Range: s.rng,
   590  		},
   591  		ContainerName: s.container,
   592  	}
   593  }