github.com/v2fly/tools@v0.100.0/internal/lsp/source/workspace_symbol.go (about)

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package source
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"go/ast"
    11  	"go/token"
    12  	"go/types"
    13  	"sort"
    14  	"strings"
    15  	"unicode"
    16  	"unicode/utf8"
    17  
    18  	"github.com/v2fly/tools/internal/event"
    19  	"github.com/v2fly/tools/internal/lsp/fuzzy"
    20  	"github.com/v2fly/tools/internal/lsp/protocol"
    21  	"github.com/v2fly/tools/internal/span"
    22  )
    23  
    24  // maxSymbols defines the maximum number of symbol results that should ever be
    25  // sent in response to a client.
    26  const maxSymbols = 100
    27  
    28  // WorkspaceSymbols matches symbols across all views using the given query,
    29  // according to the match semantics parameterized by matcherType and style.
    30  //
    31  // The workspace symbol method is defined in the spec as follows:
    32  //
    33  //   The workspace symbol request is sent from the client to the server to
    34  //   list project-wide symbols matching the query string.
    35  //
    36  // It is unclear what "project-wide" means here, but given the parameters of
    37  // workspace/symbol do not include any workspace identifier, then it has to be
    38  // assumed that "project-wide" means "across all workspaces".  Hence why
    39  // WorkspaceSymbols receives the views []View.
    40  //
    41  // However, it then becomes unclear what it would mean to call WorkspaceSymbols
    42  // with a different configured SymbolMatcher per View. Therefore we assume that
    43  // Session level configuration will define the SymbolMatcher to be used for the
    44  // WorkspaceSymbols method.
    45  func WorkspaceSymbols(ctx context.Context, matcherType SymbolMatcher, style SymbolStyle, views []View, query string) ([]protocol.SymbolInformation, error) {
    46  	ctx, done := event.Start(ctx, "source.WorkspaceSymbols")
    47  	defer done()
    48  	if query == "" {
    49  		return nil, nil
    50  	}
    51  	sc := newSymbolCollector(matcherType, style, query)
    52  	return sc.walk(ctx, views)
    53  }
    54  
    55  // A matcherFunc determines the matching score of a symbol.
    56  //
    57  // See the comment for symbolCollector for more information.
    58  type matcherFunc func(name string) float64
    59  
    60  // A symbolizer returns the best symbol match for name with pkg, according to
    61  // some heuristic.
    62  //
    63  // See the comment for symbolCollector for more information.
    64  type symbolizer func(name string, pkg Package, m matcherFunc) (string, float64)
    65  
    66  func fullyQualifiedSymbolMatch(name string, pkg Package, matcher matcherFunc) (string, float64) {
    67  	_, score := dynamicSymbolMatch(name, pkg, matcher)
    68  	if score > 0 {
    69  		return pkg.PkgPath() + "." + name, score
    70  	}
    71  	return "", 0
    72  }
    73  
    74  func dynamicSymbolMatch(name string, pkg Package, matcher matcherFunc) (string, float64) {
    75  	// Prefer any package-qualified match.
    76  	pkgQualified := pkg.Name() + "." + name
    77  	if match, score := bestMatch(pkgQualified, matcher); match != "" {
    78  		return match, score
    79  	}
    80  	fullyQualified := pkg.PkgPath() + "." + name
    81  	if match, score := bestMatch(fullyQualified, matcher); match != "" {
    82  		return match, score
    83  	}
    84  	return "", 0
    85  }
    86  
    87  func packageSymbolMatch(name string, pkg Package, matcher matcherFunc) (string, float64) {
    88  	qualified := pkg.Name() + "." + name
    89  	if matcher(qualified) > 0 {
    90  		return qualified, 1
    91  	}
    92  	return "", 0
    93  }
    94  
    95  // bestMatch returns the highest scoring symbol suffix of fullPath, starting
    96  // from the right and splitting on selectors and path components.
    97  //
    98  // e.g. given a symbol path of the form 'host.com/dir/pkg.type.field', we
    99  // check the match quality of the following:
   100  //  - field
   101  //  - type.field
   102  //  - pkg.type.field
   103  //  - dir/pkg.type.field
   104  //  - host.com/dir/pkg.type.field
   105  //
   106  // and return the best match, along with its score.
   107  //
   108  // This is used to implement the 'dynamic' symbol style.
   109  func bestMatch(fullPath string, matcher matcherFunc) (string, float64) {
   110  	pathParts := strings.Split(fullPath, "/")
   111  	dottedParts := strings.Split(pathParts[len(pathParts)-1], ".")
   112  
   113  	var best string
   114  	var score float64
   115  
   116  	for i := 0; i < len(dottedParts); i++ {
   117  		path := strings.Join(dottedParts[len(dottedParts)-1-i:], ".")
   118  		if match := matcher(path); match > score {
   119  			best = path
   120  			score = match
   121  		}
   122  	}
   123  	for i := 0; i < len(pathParts); i++ {
   124  		path := strings.Join(pathParts[len(pathParts)-1-i:], "/")
   125  		if match := matcher(path); match > score {
   126  			best = path
   127  			score = match
   128  		}
   129  	}
   130  	return best, score
   131  }
   132  
   133  // symbolCollector holds context as we walk Packages, gathering symbols that
   134  // match a given query.
   135  //
   136  // How we match symbols is parameterized by two interfaces:
   137  //  * A matcherFunc determines how well a string symbol matches a query. It
   138  //    returns a non-negative score indicating the quality of the match. A score
   139  //    of zero indicates no match.
   140  //  * A symbolizer determines how we extract the symbol for an object. This
   141  //    enables the 'symbolStyle' configuration option.
   142  type symbolCollector struct {
   143  	// These types parameterize the symbol-matching pass.
   144  	matcher    matcherFunc
   145  	symbolizer symbolizer
   146  
   147  	// current holds metadata for the package we are currently walking.
   148  	current *pkgView
   149  	curFile *ParsedGoFile
   150  
   151  	res [maxSymbols]symbolInformation
   152  }
   153  
   154  func newSymbolCollector(matcher SymbolMatcher, style SymbolStyle, query string) *symbolCollector {
   155  	var m matcherFunc
   156  	switch matcher {
   157  	case SymbolFuzzy:
   158  		m = parseQuery(query)
   159  	case SymbolCaseSensitive:
   160  		m = func(s string) float64 {
   161  			if strings.Contains(s, query) {
   162  				return 1
   163  			}
   164  			return 0
   165  		}
   166  	case SymbolCaseInsensitive:
   167  		q := strings.ToLower(query)
   168  		m = func(s string) float64 {
   169  			if strings.Contains(strings.ToLower(s), q) {
   170  				return 1
   171  			}
   172  			return 0
   173  		}
   174  	default:
   175  		panic(fmt.Errorf("unknown symbol matcher: %v", matcher))
   176  	}
   177  	var s symbolizer
   178  	switch style {
   179  	case DynamicSymbols:
   180  		s = dynamicSymbolMatch
   181  	case FullyQualifiedSymbols:
   182  		s = fullyQualifiedSymbolMatch
   183  	case PackageQualifiedSymbols:
   184  		s = packageSymbolMatch
   185  	default:
   186  		panic(fmt.Errorf("unknown symbol style: %v", style))
   187  	}
   188  	return &symbolCollector{
   189  		matcher:    m,
   190  		symbolizer: s,
   191  	}
   192  }
   193  
   194  // parseQuery parses a field-separated symbol query, extracting the special
   195  // characters listed below, and returns a matcherFunc corresponding to the AND
   196  // of all field queries.
   197  //
   198  // Special characters:
   199  //   ^  match exact prefix
   200  //   $  match exact suffix
   201  //   '  match exact
   202  //
   203  // In all three of these special queries, matches are 'smart-cased', meaning
   204  // they are case sensitive if the symbol query contains any upper-case
   205  // characters, and case insensitive otherwise.
   206  func parseQuery(q string) matcherFunc {
   207  	fields := strings.Fields(q)
   208  	if len(fields) == 0 {
   209  		return func(string) float64 { return 0 }
   210  	}
   211  	var funcs []matcherFunc
   212  	for _, field := range fields {
   213  		var f matcherFunc
   214  		switch {
   215  		case strings.HasPrefix(field, "^"):
   216  			prefix := field[1:]
   217  			f = smartCase(prefix, func(s string) float64 {
   218  				if strings.HasPrefix(s, prefix) {
   219  					return 1
   220  				}
   221  				return 0
   222  			})
   223  		case strings.HasPrefix(field, "'"):
   224  			exact := field[1:]
   225  			f = smartCase(exact, func(s string) float64 {
   226  				if strings.Contains(s, exact) {
   227  					return 1
   228  				}
   229  				return 0
   230  			})
   231  		case strings.HasSuffix(field, "$"):
   232  			suffix := field[0 : len(field)-1]
   233  			f = smartCase(suffix, func(s string) float64 {
   234  				if strings.HasSuffix(s, suffix) {
   235  					return 1
   236  				}
   237  				return 0
   238  			})
   239  		default:
   240  			fm := fuzzy.NewMatcher(field)
   241  			f = func(s string) float64 {
   242  				return float64(fm.Score(s))
   243  			}
   244  		}
   245  		funcs = append(funcs, f)
   246  	}
   247  	return comboMatcher(funcs).match
   248  }
   249  
   250  // smartCase returns a matcherFunc that is case-sensitive if q contains any
   251  // upper-case characters, and case-insensitive otherwise.
   252  func smartCase(q string, m matcherFunc) matcherFunc {
   253  	insensitive := strings.ToLower(q) == q
   254  	return func(s string) float64 {
   255  		if insensitive {
   256  			s = strings.ToLower(s)
   257  		}
   258  		return m(s)
   259  	}
   260  }
   261  
   262  type comboMatcher []matcherFunc
   263  
   264  func (c comboMatcher) match(s string) float64 {
   265  	score := 1.0
   266  	for _, f := range c {
   267  		score *= f(s)
   268  	}
   269  	return score
   270  }
   271  
   272  // walk walks views, gathers symbols, and returns the results.
   273  func (sc *symbolCollector) walk(ctx context.Context, views []View) (_ []protocol.SymbolInformation, err error) {
   274  	toWalk, err := sc.collectPackages(ctx, views)
   275  	if err != nil {
   276  		return nil, err
   277  	}
   278  	// Make sure we only walk files once (we might see them more than once due to
   279  	// build constraints).
   280  	seen := make(map[span.URI]bool)
   281  	for _, pv := range toWalk {
   282  		sc.current = pv
   283  		for _, pgf := range pv.pkg.CompiledGoFiles() {
   284  			if seen[pgf.URI] {
   285  				continue
   286  			}
   287  			seen[pgf.URI] = true
   288  			sc.curFile = pgf
   289  			sc.walkFilesDecls(pgf.File.Decls)
   290  		}
   291  	}
   292  	return sc.results(), nil
   293  }
   294  
   295  func (sc *symbolCollector) results() []protocol.SymbolInformation {
   296  	var res []protocol.SymbolInformation
   297  	for _, si := range sc.res {
   298  		if si.score <= 0 {
   299  			return res
   300  		}
   301  		res = append(res, si.asProtocolSymbolInformation())
   302  	}
   303  	return res
   304  }
   305  
   306  // collectPackages gathers all known packages and sorts for stability.
   307  func (sc *symbolCollector) collectPackages(ctx context.Context, views []View) ([]*pkgView, error) {
   308  	var toWalk []*pkgView
   309  	for _, v := range views {
   310  		snapshot, release := v.Snapshot(ctx)
   311  		defer release()
   312  		knownPkgs, err := snapshot.KnownPackages(ctx)
   313  		if err != nil {
   314  			return nil, err
   315  		}
   316  		workspacePackages, err := snapshot.WorkspacePackages(ctx)
   317  		if err != nil {
   318  			return nil, err
   319  		}
   320  		isWorkspacePkg := make(map[Package]bool)
   321  		for _, wp := range workspacePackages {
   322  			isWorkspacePkg[wp] = true
   323  		}
   324  		for _, pkg := range knownPkgs {
   325  			toWalk = append(toWalk, &pkgView{
   326  				pkg:         pkg,
   327  				isWorkspace: isWorkspacePkg[pkg],
   328  			})
   329  		}
   330  	}
   331  	// Now sort for stability of results. We order by
   332  	// (pkgView.isWorkspace, pkgView.p.ID())
   333  	sort.Slice(toWalk, func(i, j int) bool {
   334  		lhs := toWalk[i]
   335  		rhs := toWalk[j]
   336  		switch {
   337  		case lhs.isWorkspace == rhs.isWorkspace:
   338  			return lhs.pkg.ID() < rhs.pkg.ID()
   339  		case lhs.isWorkspace:
   340  			return true
   341  		default:
   342  			return false
   343  		}
   344  	})
   345  	return toWalk, nil
   346  }
   347  
   348  func (sc *symbolCollector) walkFilesDecls(decls []ast.Decl) {
   349  	for _, decl := range decls {
   350  		switch decl := decl.(type) {
   351  		case *ast.FuncDecl:
   352  			kind := protocol.Function
   353  			var recv *ast.Ident
   354  			if decl.Recv.NumFields() > 0 {
   355  				kind = protocol.Method
   356  				recv = unpackRecv(decl.Recv.List[0].Type)
   357  			}
   358  			if recv != nil {
   359  				sc.match(decl.Name.Name, kind, decl.Name, recv)
   360  			} else {
   361  				sc.match(decl.Name.Name, kind, decl.Name)
   362  			}
   363  		case *ast.GenDecl:
   364  			for _, spec := range decl.Specs {
   365  				switch spec := spec.(type) {
   366  				case *ast.TypeSpec:
   367  					sc.match(spec.Name.Name, typeToKind(sc.current.pkg.GetTypesInfo().TypeOf(spec.Type)), spec.Name)
   368  					sc.walkType(spec.Type, spec.Name)
   369  				case *ast.ValueSpec:
   370  					for _, name := range spec.Names {
   371  						kind := protocol.Variable
   372  						if decl.Tok == token.CONST {
   373  							kind = protocol.Constant
   374  						}
   375  						sc.match(name.Name, kind, name)
   376  					}
   377  				}
   378  			}
   379  		}
   380  	}
   381  }
   382  
   383  func unpackRecv(rtyp ast.Expr) *ast.Ident {
   384  	// Extract the receiver identifier. Lifted from go/types/resolver.go
   385  L:
   386  	for {
   387  		switch t := rtyp.(type) {
   388  		case *ast.ParenExpr:
   389  			rtyp = t.X
   390  		case *ast.StarExpr:
   391  			rtyp = t.X
   392  		default:
   393  			break L
   394  		}
   395  	}
   396  	if name, _ := rtyp.(*ast.Ident); name != nil {
   397  		return name
   398  	}
   399  	return nil
   400  }
   401  
   402  // walkType processes symbols related to a type expression. path is path of
   403  // nested type identifiers to the type expression.
   404  func (sc *symbolCollector) walkType(typ ast.Expr, path ...*ast.Ident) {
   405  	switch st := typ.(type) {
   406  	case *ast.StructType:
   407  		for _, field := range st.Fields.List {
   408  			sc.walkField(field, protocol.Field, protocol.Field, path...)
   409  		}
   410  	case *ast.InterfaceType:
   411  		for _, field := range st.Methods.List {
   412  			sc.walkField(field, protocol.Interface, protocol.Method, path...)
   413  		}
   414  	}
   415  }
   416  
   417  // walkField processes symbols related to the struct field or interface method.
   418  //
   419  // unnamedKind and namedKind are the symbol kinds if the field is resp. unnamed
   420  // or named. path is the path of nested identifiers containing the field.
   421  func (sc *symbolCollector) walkField(field *ast.Field, unnamedKind, namedKind protocol.SymbolKind, path ...*ast.Ident) {
   422  	if len(field.Names) == 0 {
   423  		sc.match(types.ExprString(field.Type), unnamedKind, field, path...)
   424  	}
   425  	for _, name := range field.Names {
   426  		sc.match(name.Name, namedKind, name, path...)
   427  		sc.walkType(field.Type, append(path, name)...)
   428  	}
   429  }
   430  
   431  func typeToKind(typ types.Type) protocol.SymbolKind {
   432  	switch typ := typ.Underlying().(type) {
   433  	case *types.Interface:
   434  		return protocol.Interface
   435  	case *types.Struct:
   436  		return protocol.Struct
   437  	case *types.Signature:
   438  		if typ.Recv() != nil {
   439  			return protocol.Method
   440  		}
   441  		return protocol.Function
   442  	case *types.Named:
   443  		return typeToKind(typ.Underlying())
   444  	case *types.Basic:
   445  		i := typ.Info()
   446  		switch {
   447  		case i&types.IsNumeric != 0:
   448  			return protocol.Number
   449  		case i&types.IsBoolean != 0:
   450  			return protocol.Boolean
   451  		case i&types.IsString != 0:
   452  			return protocol.String
   453  		}
   454  	}
   455  	return protocol.Variable
   456  }
   457  
   458  // match finds matches and gathers the symbol identified by name, kind and node
   459  // via the symbolCollector's matcher after first de-duping against previously
   460  // seen symbols.
   461  //
   462  // path specifies the identifier path to a nested field or interface method.
   463  func (sc *symbolCollector) match(name string, kind protocol.SymbolKind, node ast.Node, path ...*ast.Ident) {
   464  	if !node.Pos().IsValid() || !node.End().IsValid() {
   465  		return
   466  	}
   467  
   468  	isExported := isExported(name)
   469  	if len(path) > 0 {
   470  		var nameBuilder strings.Builder
   471  		for _, ident := range path {
   472  			nameBuilder.WriteString(ident.Name)
   473  			nameBuilder.WriteString(".")
   474  			if !ident.IsExported() {
   475  				isExported = false
   476  			}
   477  		}
   478  		nameBuilder.WriteString(name)
   479  		name = nameBuilder.String()
   480  	}
   481  
   482  	// Factors to apply to the match score for the purpose of downranking
   483  	// results.
   484  	//
   485  	// These numbers were crudely calibrated based on trial-and-error using a
   486  	// small number of sample queries. Adjust as necessary.
   487  	//
   488  	// All factors are multiplicative, meaning if more than one applies they are
   489  	// multiplied together.
   490  	const (
   491  		// nonWorkspaceFactor is applied to symbols outside of any active
   492  		// workspace. Developers are less likely to want to jump to code that they
   493  		// are not actively working on.
   494  		nonWorkspaceFactor = 0.5
   495  		// nonWorkspaceUnexportedFactor is applied to unexported symbols outside of
   496  		// any active workspace. Since one wouldn't usually jump to unexported
   497  		// symbols to understand a package API, they are particularly irrelevant.
   498  		nonWorkspaceUnexportedFactor = 0.5
   499  		// fieldFactor is applied to fields and interface methods. One would
   500  		// typically jump to the type definition first, so ranking fields highly
   501  		// can be noisy.
   502  		fieldFactor = 0.5
   503  	)
   504  	symbol, score := sc.symbolizer(name, sc.current.pkg, sc.matcher)
   505  
   506  	// Downrank symbols outside of the workspace.
   507  	if !sc.current.isWorkspace {
   508  		score *= nonWorkspaceFactor
   509  		if !isExported {
   510  			score *= nonWorkspaceUnexportedFactor
   511  		}
   512  	}
   513  
   514  	// Downrank fields.
   515  	if len(path) > 0 {
   516  		score *= fieldFactor
   517  	}
   518  
   519  	// Avoid the work below if we know this score will not be sorted into the
   520  	// results.
   521  	if score <= sc.res[len(sc.res)-1].score {
   522  		return
   523  	}
   524  
   525  	rng, err := fileRange(sc.curFile, node.Pos(), node.End())
   526  	if err != nil {
   527  		return
   528  	}
   529  	si := symbolInformation{
   530  		score:     score,
   531  		name:      name,
   532  		symbol:    symbol,
   533  		container: sc.current.pkg.PkgPath(),
   534  		kind:      kind,
   535  		location: protocol.Location{
   536  			URI:   protocol.URIFromSpanURI(sc.curFile.URI),
   537  			Range: rng,
   538  		},
   539  	}
   540  	insertAt := sort.Search(len(sc.res), func(i int) bool {
   541  		return sc.res[i].score < score
   542  	})
   543  	if insertAt < len(sc.res)-1 {
   544  		copy(sc.res[insertAt+1:], sc.res[insertAt:len(sc.res)-1])
   545  	}
   546  	sc.res[insertAt] = si
   547  }
   548  
   549  func fileRange(pgf *ParsedGoFile, start, end token.Pos) (protocol.Range, error) {
   550  	s, err := span.FileSpan(pgf.Tok, pgf.Mapper.Converter, start, end)
   551  	if err != nil {
   552  		return protocol.Range{}, nil
   553  	}
   554  	return pgf.Mapper.Range(s)
   555  }
   556  
   557  // isExported reports if a token is exported. Copied from
   558  // token.IsExported (go1.13+).
   559  //
   560  // TODO: replace usage with token.IsExported once go1.12 is no longer
   561  // supported.
   562  func isExported(name string) bool {
   563  	ch, _ := utf8.DecodeRuneInString(name)
   564  	return unicode.IsUpper(ch)
   565  }
   566  
   567  // pkgView holds information related to a package that we are going to walk.
   568  type pkgView struct {
   569  	pkg         Package
   570  	isWorkspace bool
   571  }
   572  
   573  // symbolInformation is a cut-down version of protocol.SymbolInformation that
   574  // allows struct values of this type to be used as map keys.
   575  type symbolInformation struct {
   576  	score     float64
   577  	name      string
   578  	symbol    string
   579  	container string
   580  	kind      protocol.SymbolKind
   581  	location  protocol.Location
   582  }
   583  
   584  // asProtocolSymbolInformation converts s to a protocol.SymbolInformation value.
   585  //
   586  // TODO: work out how to handle tags if/when they are needed.
   587  func (s symbolInformation) asProtocolSymbolInformation() protocol.SymbolInformation {
   588  	return protocol.SymbolInformation{
   589  		Name:          s.symbol,
   590  		Kind:          s.kind,
   591  		Location:      s.location,
   592  		ContainerName: s.container,
   593  	}
   594  }