github.com/jd-ly/tools@v0.5.7/internal/lsp/source/workspace_symbol.go (about)

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package source
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"go/ast"
    11  	"go/token"
    12  	"go/types"
    13  	"sort"
    14  	"strings"
    15  	"unicode"
    16  	"unicode/utf8"
    17  
    18  	"github.com/jd-ly/tools/internal/event"
    19  	"github.com/jd-ly/tools/internal/lsp/fuzzy"
    20  	"github.com/jd-ly/tools/internal/lsp/protocol"
    21  	"github.com/jd-ly/tools/internal/span"
    22  )
    23  
    24  // maxSymbols defines the maximum number of symbol results that should ever be
    25  // sent in response to a client.
    26  const maxSymbols = 100
    27  
    28  // WorkspaceSymbols matches symbols across all views using the given query,
    29  // according to the match semantics parameterized by matcherType and style.
    30  //
    31  // The workspace symbol method is defined in the spec as follows:
    32  //
    33  //   The workspace symbol request is sent from the client to the server to
    34  //   list project-wide symbols matching the query string.
    35  //
    36  // It is unclear what "project-wide" means here, but given the parameters of
    37  // workspace/symbol do not include any workspace identifier, then it has to be
    38  // assumed that "project-wide" means "across all workspaces".  Hence why
    39  // WorkspaceSymbols receives the views []View.
    40  //
    41  // However, it then becomes unclear what it would mean to call WorkspaceSymbols
    42  // with a different configured SymbolMatcher per View. Therefore we assume that
    43  // Session level configuration will define the SymbolMatcher to be used for the
    44  // WorkspaceSymbols method.
    45  func WorkspaceSymbols(ctx context.Context, matcherType SymbolMatcher, style SymbolStyle, views []View, query string) ([]protocol.SymbolInformation, error) {
    46  	ctx, done := event.Start(ctx, "source.WorkspaceSymbols")
    47  	defer done()
    48  	if query == "" {
    49  		return nil, nil
    50  	}
    51  	sc := newSymbolCollector(matcherType, style, query)
    52  	return sc.walk(ctx, views)
    53  }
    54  
    55  // A matcherFunc determines the matching score of a symbol.
    56  //
    57  // See the comment for symbolCollector for more information.
    58  type matcherFunc func(name string) float64
    59  
    60  // A symbolizer returns the best symbol match for name with pkg, according to
    61  // some heuristic.
    62  //
    63  // See the comment for symbolCollector for more information.
    64  type symbolizer func(name string, pkg Package, m matcherFunc) (string, float64)
    65  
    66  func fullyQualifiedSymbolMatch(name string, pkg Package, matcher matcherFunc) (string, float64) {
    67  	_, score := dynamicSymbolMatch(name, pkg, matcher)
    68  	if score > 0 {
    69  		return pkg.PkgPath() + "." + name, score
    70  	}
    71  	return "", 0
    72  }
    73  
    74  func dynamicSymbolMatch(name string, pkg Package, matcher matcherFunc) (string, float64) {
    75  	// Prefer any package-qualified match.
    76  	pkgQualified := pkg.Name() + "." + name
    77  	if match, score := bestMatch(pkgQualified, matcher); match != "" {
    78  		return match, score
    79  	}
    80  	fullyQualified := pkg.PkgPath() + "." + name
    81  	if match, score := bestMatch(fullyQualified, matcher); match != "" {
    82  		return match, score
    83  	}
    84  	return "", 0
    85  }
    86  
    87  func packageSymbolMatch(name string, pkg Package, matcher matcherFunc) (string, float64) {
    88  	qualified := pkg.Name() + "." + name
    89  	if matcher(qualified) > 0 {
    90  		return qualified, 1
    91  	}
    92  	return "", 0
    93  }
    94  
    95  // bestMatch returns the highest scoring symbol suffix of fullPath, starting
    96  // from the right and splitting on selectors and path components.
    97  //
    98  // e.g. given a symbol path of the form 'host.com/dir/pkg.type.field', we
    99  // check the match quality of the following:
   100  //  - field
   101  //  - type.field
   102  //  - pkg.type.field
   103  //  - dir/pkg.type.field
   104  //  - host.com/dir/pkg.type.field
   105  //
   106  // and return the best match, along with its score.
   107  //
   108  // This is used to implement the 'dynamic' symbol style.
   109  func bestMatch(fullPath string, matcher matcherFunc) (string, float64) {
   110  	pathParts := strings.Split(fullPath, "/")
   111  	dottedParts := strings.Split(pathParts[len(pathParts)-1], ".")
   112  
   113  	var best string
   114  	var score float64
   115  
   116  	for i := 0; i < len(dottedParts); i++ {
   117  		path := strings.Join(dottedParts[len(dottedParts)-1-i:], ".")
   118  		if match := matcher(path); match > score {
   119  			best = path
   120  			score = match
   121  		}
   122  	}
   123  	for i := 0; i < len(pathParts); i++ {
   124  		path := strings.Join(pathParts[len(pathParts)-1-i:], "/")
   125  		if match := matcher(path); match > score {
   126  			best = path
   127  			score = match
   128  		}
   129  	}
   130  	return best, score
   131  }
   132  
   133  // symbolCollector holds context as we walk Packages, gathering symbols that
   134  // match a given query.
   135  //
   136  // How we match symbols is parameterized by two interfaces:
   137  //  * A matcherFunc determines how well a string symbol matches a query. It
   138  //    returns a non-negative score indicating the quality of the match. A score
   139  //    of zero indicates no match.
   140  //  * A symbolizer determines how we extract the symbol for an object. This
   141  //    enables the 'symbolStyle' configuration option.
   142  type symbolCollector struct {
   143  	// These types parameterize the symbol-matching pass.
   144  	matcher    matcherFunc
   145  	symbolizer symbolizer
   146  
   147  	// current holds metadata for the package we are currently walking.
   148  	current *pkgView
   149  	curFile *ParsedGoFile
   150  
   151  	res [maxSymbols]symbolInformation
   152  }
   153  
   154  func newSymbolCollector(matcher SymbolMatcher, style SymbolStyle, query string) *symbolCollector {
   155  	var m matcherFunc
   156  	switch matcher {
   157  	case SymbolFuzzy:
   158  		m = parseQuery(query)
   159  	case SymbolCaseSensitive:
   160  		m = func(s string) float64 {
   161  			if strings.Contains(s, query) {
   162  				return 1
   163  			}
   164  			return 0
   165  		}
   166  	case SymbolCaseInsensitive:
   167  		q := strings.ToLower(query)
   168  		m = func(s string) float64 {
   169  			if strings.Contains(strings.ToLower(s), q) {
   170  				return 1
   171  			}
   172  			return 0
   173  		}
   174  	default:
   175  		panic(fmt.Errorf("unknown symbol matcher: %v", matcher))
   176  	}
   177  	var s symbolizer
   178  	switch style {
   179  	case DynamicSymbols:
   180  		s = dynamicSymbolMatch
   181  	case FullyQualifiedSymbols:
   182  		s = fullyQualifiedSymbolMatch
   183  	case PackageQualifiedSymbols:
   184  		s = packageSymbolMatch
   185  	default:
   186  		panic(fmt.Errorf("unknown symbol style: %v", style))
   187  	}
   188  	return &symbolCollector{
   189  		matcher:    m,
   190  		symbolizer: s,
   191  	}
   192  }
   193  
   194  // parseQuery parses a field-separated symbol query, extracting the special
   195  // characters listed below, and returns a matcherFunc corresponding to the AND
   196  // of all field queries.
   197  //
   198  // Special characters:
   199  //   ^  match exact prefix
   200  //   $  match exact suffix
   201  //   '  match exact
   202  //
   203  // In all three of these special queries, matches are 'smart-cased', meaning
   204  // they are case sensitive if the symbol query contains any upper-case
   205  // characters, and case insensitive otherwise.
   206  func parseQuery(q string) matcherFunc {
   207  	fields := strings.Fields(q)
   208  	if len(fields) == 0 {
   209  		return func(string) float64 { return 0 }
   210  	}
   211  	var funcs []matcherFunc
   212  	for _, field := range fields {
   213  		var f matcherFunc
   214  		switch {
   215  		case strings.HasPrefix(field, "^"):
   216  			prefix := field[1:]
   217  			f = smartCase(prefix, func(s string) float64 {
   218  				if strings.HasPrefix(s, prefix) {
   219  					return 1
   220  				}
   221  				return 0
   222  			})
   223  		case strings.HasPrefix(field, "'"):
   224  			exact := field[1:]
   225  			f = smartCase(exact, func(s string) float64 {
   226  				if strings.Contains(s, exact) {
   227  					return 1
   228  				}
   229  				return 0
   230  			})
   231  		case strings.HasSuffix(field, "$"):
   232  			suffix := field[0 : len(field)-1]
   233  			f = smartCase(suffix, func(s string) float64 {
   234  				if strings.HasSuffix(s, suffix) {
   235  					return 1
   236  				}
   237  				return 0
   238  			})
   239  		default:
   240  			fm := fuzzy.NewMatcher(field)
   241  			f = func(s string) float64 {
   242  				return float64(fm.Score(s))
   243  			}
   244  		}
   245  		funcs = append(funcs, f)
   246  	}
   247  	return comboMatcher(funcs).match
   248  }
   249  
   250  // smartCase returns a matcherFunc that is case-sensitive if q contains any
   251  // upper-case characters, and case-insensitive otherwise.
   252  func smartCase(q string, m matcherFunc) matcherFunc {
   253  	insensitive := strings.ToLower(q) == q
   254  	return func(s string) float64 {
   255  		if insensitive {
   256  			s = strings.ToLower(s)
   257  		}
   258  		return m(s)
   259  	}
   260  }
   261  
   262  type comboMatcher []matcherFunc
   263  
   264  func (c comboMatcher) match(s string) float64 {
   265  	score := 1.0
   266  	for _, f := range c {
   267  		score *= f(s)
   268  	}
   269  	return score
   270  }
   271  
   272  // walk walks views, gathers symbols, and returns the results.
   273  func (sc *symbolCollector) walk(ctx context.Context, views []View) (_ []protocol.SymbolInformation, err error) {
   274  	toWalk, release, err := sc.collectPackages(ctx, views)
   275  	defer release()
   276  	if err != nil {
   277  		return nil, err
   278  	}
   279  	// Make sure we only walk files once (we might see them more than once due to
   280  	// build constraints).
   281  	seen := make(map[span.URI]bool)
   282  	for _, pv := range toWalk {
   283  		sc.current = pv
   284  		for _, pgf := range pv.pkg.CompiledGoFiles() {
   285  			if seen[pgf.URI] {
   286  				continue
   287  			}
   288  			seen[pgf.URI] = true
   289  			sc.curFile = pgf
   290  			sc.walkFilesDecls(pgf.File.Decls)
   291  		}
   292  	}
   293  	return sc.results(), nil
   294  }
   295  
   296  func (sc *symbolCollector) results() []protocol.SymbolInformation {
   297  	var res []protocol.SymbolInformation
   298  	for _, si := range sc.res {
   299  		if si.score <= 0 {
   300  			return res
   301  		}
   302  		res = append(res, si.asProtocolSymbolInformation())
   303  	}
   304  	return res
   305  }
   306  
   307  // collectPackages gathers the packages we are going to inspect for symbols.
   308  // This pre-step is required in order to filter out any "duplicate"
   309  // *types.Package. The duplicates arise for packages that have test variants.
   310  // For example, if package mod.com/p has test files, then we will visit two
   311  // packages that have the PkgPath() mod.com/p: the first is the actual package
   312  // mod.com/p, the second is a special version that includes the non-XTest
   313  // _test.go files. If we were to walk both of of these packages, then we would
   314  // get duplicate matching symbols and we would waste effort. Therefore where
   315  // test variants exist we walk those (because they include any symbols defined
   316  // in non-XTest _test.go files).
   317  //
   318  // One further complication is that even after this filtering, packages between
   319  // views might not be "identical" because they can be built using different
   320  // build constraints (via the "env" config option).
   321  //
   322  // Therefore on a per view basis we first build up a map of package path ->
   323  // *types.Package preferring the test variants if they exist. Then we merge the
   324  // results between views, de-duping by *types.Package.
   325  func (sc *symbolCollector) collectPackages(ctx context.Context, views []View) ([]*pkgView, func(), error) {
   326  	gathered := make(map[string]map[*types.Package]*pkgView)
   327  	var releaseFuncs []func()
   328  	release := func() {
   329  		for _, releaseFunc := range releaseFuncs {
   330  			releaseFunc()
   331  		}
   332  	}
   333  	var toWalk []*pkgView
   334  	for _, v := range views {
   335  		seen := make(map[string]*pkgView)
   336  		snapshot, release := v.Snapshot(ctx)
   337  		releaseFuncs = append(releaseFuncs, release)
   338  		knownPkgs, err := snapshot.KnownPackages(ctx)
   339  		if err != nil {
   340  			return nil, release, err
   341  		}
   342  		workspacePackages, err := snapshot.WorkspacePackages(ctx)
   343  		if err != nil {
   344  			return nil, release, err
   345  		}
   346  		isWorkspacePkg := make(map[Package]bool)
   347  		for _, wp := range workspacePackages {
   348  			isWorkspacePkg[wp] = true
   349  		}
   350  		var forTests []*pkgView
   351  		for _, pkg := range knownPkgs {
   352  			toAdd := &pkgView{
   353  				pkg:         pkg,
   354  				snapshot:    snapshot,
   355  				isWorkspace: isWorkspacePkg[pkg],
   356  			}
   357  			// Defer test packages, so that they overwrite seen for this package
   358  			// path.
   359  			if pkg.ForTest() != "" {
   360  				forTests = append(forTests, toAdd)
   361  			} else {
   362  				seen[pkg.PkgPath()] = toAdd
   363  			}
   364  		}
   365  		for _, pkg := range forTests {
   366  			seen[pkg.pkg.PkgPath()] = pkg
   367  		}
   368  		for _, pkg := range seen {
   369  			pm, ok := gathered[pkg.pkg.PkgPath()]
   370  			if !ok {
   371  				pm = make(map[*types.Package]*pkgView)
   372  				gathered[pkg.pkg.PkgPath()] = pm
   373  			}
   374  			pm[pkg.pkg.GetTypes()] = pkg
   375  		}
   376  	}
   377  	for _, pm := range gathered {
   378  		for _, pkg := range pm {
   379  			toWalk = append(toWalk, pkg)
   380  		}
   381  	}
   382  	// Now sort for stability of results. We order by
   383  	// (pkgView.isWorkspace, pkgView.p.ID())
   384  	sort.Slice(toWalk, func(i, j int) bool {
   385  		lhs := toWalk[i]
   386  		rhs := toWalk[j]
   387  		switch {
   388  		case lhs.isWorkspace == rhs.isWorkspace:
   389  			return lhs.pkg.ID() < rhs.pkg.ID()
   390  		case lhs.isWorkspace:
   391  			return true
   392  		default:
   393  			return false
   394  		}
   395  	})
   396  	return toWalk, release, nil
   397  }
   398  
   399  func (sc *symbolCollector) walkFilesDecls(decls []ast.Decl) {
   400  	for _, decl := range decls {
   401  		switch decl := decl.(type) {
   402  		case *ast.FuncDecl:
   403  			kind := protocol.Function
   404  			var recv *ast.Ident
   405  			if decl.Recv != nil {
   406  				kind = protocol.Method
   407  				switch typ := decl.Recv.List[0].Type.(type) {
   408  				case *ast.StarExpr:
   409  					recv = typ.X.(*ast.Ident)
   410  				case *ast.Ident:
   411  					recv = typ
   412  				}
   413  			}
   414  			if recv != nil {
   415  				sc.match(decl.Name.Name, kind, decl.Name, recv)
   416  			} else {
   417  				sc.match(decl.Name.Name, kind, decl.Name)
   418  			}
   419  		case *ast.GenDecl:
   420  			for _, spec := range decl.Specs {
   421  				switch spec := spec.(type) {
   422  				case *ast.TypeSpec:
   423  					sc.match(spec.Name.Name, typeToKind(sc.current.pkg.GetTypesInfo().TypeOf(spec.Type)), spec.Name)
   424  					sc.walkType(spec.Type, spec.Name)
   425  				case *ast.ValueSpec:
   426  					for _, name := range spec.Names {
   427  						kind := protocol.Variable
   428  						if decl.Tok == token.CONST {
   429  							kind = protocol.Constant
   430  						}
   431  						sc.match(name.Name, kind, name)
   432  					}
   433  				}
   434  			}
   435  		}
   436  	}
   437  }
   438  
   439  // walkType processes symbols related to a type expression. path is path of
   440  // nested type identifiers to the type expression.
   441  func (sc *symbolCollector) walkType(typ ast.Expr, path ...*ast.Ident) {
   442  	switch st := typ.(type) {
   443  	case *ast.StructType:
   444  		for _, field := range st.Fields.List {
   445  			sc.walkField(field, protocol.Field, protocol.Field, path...)
   446  		}
   447  	case *ast.InterfaceType:
   448  		for _, field := range st.Methods.List {
   449  			sc.walkField(field, protocol.Interface, protocol.Method, path...)
   450  		}
   451  	}
   452  }
   453  
   454  // walkField processes symbols related to the struct field or interface method.
   455  //
   456  // unnamedKind and namedKind are the symbol kinds if the field is resp. unnamed
   457  // or named. path is the path of nested identifiers containing the field.
   458  func (sc *symbolCollector) walkField(field *ast.Field, unnamedKind, namedKind protocol.SymbolKind, path ...*ast.Ident) {
   459  	if len(field.Names) == 0 {
   460  		sc.match(types.ExprString(field.Type), unnamedKind, field, path...)
   461  	}
   462  	for _, name := range field.Names {
   463  		sc.match(name.Name, namedKind, name, path...)
   464  		sc.walkType(field.Type, append(path, name)...)
   465  	}
   466  }
   467  
   468  func typeToKind(typ types.Type) protocol.SymbolKind {
   469  	switch typ := typ.Underlying().(type) {
   470  	case *types.Interface:
   471  		return protocol.Interface
   472  	case *types.Struct:
   473  		return protocol.Struct
   474  	case *types.Signature:
   475  		if typ.Recv() != nil {
   476  			return protocol.Method
   477  		}
   478  		return protocol.Function
   479  	case *types.Named:
   480  		return typeToKind(typ.Underlying())
   481  	case *types.Basic:
   482  		i := typ.Info()
   483  		switch {
   484  		case i&types.IsNumeric != 0:
   485  			return protocol.Number
   486  		case i&types.IsBoolean != 0:
   487  			return protocol.Boolean
   488  		case i&types.IsString != 0:
   489  			return protocol.String
   490  		}
   491  	}
   492  	return protocol.Variable
   493  }
   494  
   495  // match finds matches and gathers the symbol identified by name, kind and node
   496  // via the symbolCollector's matcher after first de-duping against previously
   497  // seen symbols.
   498  //
   499  // path specifies the identifier path to a nested field or interface method.
   500  func (sc *symbolCollector) match(name string, kind protocol.SymbolKind, node ast.Node, path ...*ast.Ident) {
   501  	if !node.Pos().IsValid() || !node.End().IsValid() {
   502  		return
   503  	}
   504  
   505  	isExported := isExported(name)
   506  	if len(path) > 0 {
   507  		var nameBuilder strings.Builder
   508  		for _, ident := range path {
   509  			nameBuilder.WriteString(ident.Name)
   510  			nameBuilder.WriteString(".")
   511  			if !ident.IsExported() {
   512  				isExported = false
   513  			}
   514  		}
   515  		nameBuilder.WriteString(name)
   516  		name = nameBuilder.String()
   517  	}
   518  
   519  	// Factors to apply to the match score for the purpose of downranking
   520  	// results.
   521  	//
   522  	// These numbers were crudely calibrated based on trial-and-error using a
   523  	// small number of sample queries. Adjust as necessary.
   524  	//
   525  	// All factors are multiplicative, meaning if more than one applies they are
   526  	// multiplied together.
   527  	const (
   528  		// nonWorkspaceFactor is applied to symbols outside of any active
   529  		// workspace. Developers are less likely to want to jump to code that they
   530  		// are not actively working on.
   531  		nonWorkspaceFactor = 0.5
   532  		// nonWorkspaceUnexportedFactor is applied to unexported symbols outside of
   533  		// any active workspace. Since one wouldn't usually jump to unexported
   534  		// symbols to understand a package API, they are particularly irrelevant.
   535  		nonWorkspaceUnexportedFactor = 0.5
   536  		// fieldFactor is applied to fields and interface methods. One would
   537  		// typically jump to the type definition first, so ranking fields highly
   538  		// can be noisy.
   539  		fieldFactor = 0.5
   540  	)
   541  	symbol, score := sc.symbolizer(name, sc.current.pkg, sc.matcher)
   542  
   543  	// Downrank symbols outside of the workspace.
   544  	if !sc.current.isWorkspace {
   545  		score *= nonWorkspaceFactor
   546  		if !isExported {
   547  			score *= nonWorkspaceUnexportedFactor
   548  		}
   549  	}
   550  
   551  	// Downrank fields.
   552  	if len(path) > 0 {
   553  		score *= fieldFactor
   554  	}
   555  
   556  	// Avoid the work below if we know this score will not be sorted into the
   557  	// results.
   558  	if score <= sc.res[len(sc.res)-1].score {
   559  		return
   560  	}
   561  
   562  	mrng := NewMappedRange(sc.current.snapshot.FileSet(), sc.curFile.Mapper, node.Pos(), node.End())
   563  	rng, err := mrng.Range()
   564  	if err != nil {
   565  		return
   566  	}
   567  	si := symbolInformation{
   568  		score:     score,
   569  		name:      name,
   570  		symbol:    symbol,
   571  		container: sc.current.pkg.PkgPath(),
   572  		kind:      kind,
   573  		location: protocol.Location{
   574  			URI:   protocol.URIFromSpanURI(mrng.URI()),
   575  			Range: rng,
   576  		},
   577  	}
   578  	insertAt := sort.Search(len(sc.res), func(i int) bool {
   579  		return sc.res[i].score < score
   580  	})
   581  	if insertAt < len(sc.res)-1 {
   582  		copy(sc.res[insertAt+1:], sc.res[insertAt:len(sc.res)-1])
   583  	}
   584  	sc.res[insertAt] = si
   585  }
   586  
   587  // isExported reports if a token is exported. Copied from
   588  // token.IsExported (go1.13+).
   589  //
   590  // TODO: replace usage with token.IsExported once go1.12 is no longer
   591  // supported.
   592  func isExported(name string) bool {
   593  	ch, _ := utf8.DecodeRuneInString(name)
   594  	return unicode.IsUpper(ch)
   595  }
   596  
   597  // pkgView holds information related to a package that we are going to walk.
   598  type pkgView struct {
   599  	pkg         Package
   600  	snapshot    Snapshot
   601  	isWorkspace bool
   602  }
   603  
   604  // symbolInformation is a cut-down version of protocol.SymbolInformation that
   605  // allows struct values of this type to be used as map keys.
   606  type symbolInformation struct {
   607  	score     float64
   608  	name      string
   609  	symbol    string
   610  	container string
   611  	kind      protocol.SymbolKind
   612  	location  protocol.Location
   613  }
   614  
   615  // asProtocolSymbolInformation converts s to a protocol.SymbolInformation value.
   616  //
   617  // TODO: work out how to handle tags if/when they are needed.
   618  func (s symbolInformation) asProtocolSymbolInformation() protocol.SymbolInformation {
   619  	return protocol.SymbolInformation{
   620  		Name:          s.symbol,
   621  		Kind:          s.kind,
   622  		Location:      s.location,
   623  		ContainerName: s.container,
   624  	}
   625  }