golang.org/x/tools/gopls@v0.15.3/internal/golang/workspace_symbol.go (about) 1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package golang 6 7 import ( 8 "context" 9 "fmt" 10 "path/filepath" 11 "runtime" 12 "sort" 13 "strings" 14 "unicode" 15 16 "golang.org/x/tools/gopls/internal/cache" 17 "golang.org/x/tools/gopls/internal/cache/metadata" 18 "golang.org/x/tools/gopls/internal/protocol" 19 "golang.org/x/tools/gopls/internal/settings" 20 "golang.org/x/tools/internal/event" 21 "golang.org/x/tools/internal/fuzzy" 22 ) 23 24 // maxSymbols defines the maximum number of symbol results that should ever be 25 // sent in response to a client. 26 const maxSymbols = 100 27 28 // WorkspaceSymbols matches symbols across all views using the given query, 29 // according to the match semantics parameterized by matcherType and style. 30 // 31 // The workspace symbol method is defined in the spec as follows: 32 // 33 // The workspace symbol request is sent from the client to the server to 34 // list project-wide symbols matching the query string. 35 // 36 // It is unclear what "project-wide" means here, but given the parameters of 37 // workspace/symbol do not include any workspace identifier, then it has to be 38 // assumed that "project-wide" means "across all workspaces". Hence why 39 // WorkspaceSymbols receives the views []View. 40 // 41 // However, it then becomes unclear what it would mean to call WorkspaceSymbols 42 // with a different configured SymbolMatcher per View. Therefore we assume that 43 // Session level configuration will define the SymbolMatcher to be used for the 44 // WorkspaceSymbols method. 45 func WorkspaceSymbols(ctx context.Context, matcher settings.SymbolMatcher, style settings.SymbolStyle, snapshots []*cache.Snapshot, query string) ([]protocol.SymbolInformation, error) { 46 ctx, done := event.Start(ctx, "golang.WorkspaceSymbols") 47 defer done() 48 if query == "" { 49 return nil, nil 50 } 51 52 var s symbolizer 53 switch style { 54 case settings.DynamicSymbols: 55 s = dynamicSymbolMatch 56 case settings.FullyQualifiedSymbols: 57 s = fullyQualifiedSymbolMatch 58 case settings.PackageQualifiedSymbols: 59 s = packageSymbolMatch 60 default: 61 panic(fmt.Errorf("unknown symbol style: %v", style)) 62 } 63 64 return collectSymbols(ctx, snapshots, matcher, s, query) 65 } 66 67 // A matcherFunc returns the index and score of a symbol match. 68 // 69 // See the comment for symbolCollector for more information. 70 type matcherFunc func(chunks []string) (int, float64) 71 72 // A symbolizer returns the best symbol match for a name with pkg, according to 73 // some heuristic. The symbol name is passed as the slice nameParts of logical 74 // name pieces. For example, for myType.field the caller can pass either 75 // []string{"myType.field"} or []string{"myType.", "field"}. 76 // 77 // See the comment for symbolCollector for more information. 78 // 79 // The space argument is an empty slice with spare capacity that may be used 80 // to allocate the result. 81 type symbolizer func(space []string, name string, pkg *metadata.Package, m matcherFunc) ([]string, float64) 82 83 func fullyQualifiedSymbolMatch(space []string, name string, pkg *metadata.Package, matcher matcherFunc) ([]string, float64) { 84 if _, score := dynamicSymbolMatch(space, name, pkg, matcher); score > 0 { 85 return append(space, string(pkg.PkgPath), ".", name), score 86 } 87 return nil, 0 88 } 89 90 func dynamicSymbolMatch(space []string, name string, pkg *metadata.Package, matcher matcherFunc) ([]string, float64) { 91 if metadata.IsCommandLineArguments(pkg.ID) { 92 // command-line-arguments packages have a non-sensical package path, so 93 // just use their package name. 94 return packageSymbolMatch(space, name, pkg, matcher) 95 } 96 97 var score float64 98 99 endsInPkgName := strings.HasSuffix(string(pkg.PkgPath), string(pkg.Name)) 100 101 // If the package path does not end in the package name, we need to check the 102 // package-qualified symbol as an extra pass first. 103 if !endsInPkgName { 104 pkgQualified := append(space, string(pkg.Name), ".", name) 105 idx, score := matcher(pkgQualified) 106 nameStart := len(pkg.Name) + 1 107 if score > 0 { 108 // If our match is contained entirely within the unqualified portion, 109 // just return that. 110 if idx >= nameStart { 111 return append(space, name), score 112 } 113 // Lower the score for matches that include the package name. 114 return pkgQualified, score * 0.8 115 } 116 } 117 118 // Now try matching the fully qualified symbol. 119 fullyQualified := append(space, string(pkg.PkgPath), ".", name) 120 idx, score := matcher(fullyQualified) 121 122 // As above, check if we matched just the unqualified symbol name. 123 nameStart := len(pkg.PkgPath) + 1 124 if idx >= nameStart { 125 return append(space, name), score 126 } 127 128 // If our package path ends in the package name, we'll have skipped the 129 // initial pass above, so check if we matched just the package-qualified 130 // name. 131 if endsInPkgName && idx >= 0 { 132 pkgStart := len(pkg.PkgPath) - len(pkg.Name) 133 if idx >= pkgStart { 134 return append(space, string(pkg.Name), ".", name), score 135 } 136 } 137 138 // Our match was not contained within the unqualified or package qualified 139 // symbol. Return the fully qualified symbol but discount the score. 140 return fullyQualified, score * 0.6 141 } 142 143 func packageSymbolMatch(space []string, name string, pkg *metadata.Package, matcher matcherFunc) ([]string, float64) { 144 qualified := append(space, string(pkg.Name), ".", name) 145 if _, s := matcher(qualified); s > 0 { 146 return qualified, s 147 } 148 return nil, 0 149 } 150 151 func buildMatcher(matcher settings.SymbolMatcher, query string) matcherFunc { 152 switch matcher { 153 case settings.SymbolFuzzy: 154 return parseQuery(query, newFuzzyMatcher) 155 case settings.SymbolFastFuzzy: 156 return parseQuery(query, func(query string) matcherFunc { 157 return fuzzy.NewSymbolMatcher(query).Match 158 }) 159 case settings.SymbolCaseSensitive: 160 return matchExact(query) 161 case settings.SymbolCaseInsensitive: 162 q := strings.ToLower(query) 163 exact := matchExact(q) 164 wrapper := []string{""} 165 return func(chunks []string) (int, float64) { 166 s := strings.Join(chunks, "") 167 wrapper[0] = strings.ToLower(s) 168 return exact(wrapper) 169 } 170 } 171 panic(fmt.Errorf("unknown symbol matcher: %v", matcher)) 172 } 173 174 func newFuzzyMatcher(query string) matcherFunc { 175 fm := fuzzy.NewMatcher(query) 176 return func(chunks []string) (int, float64) { 177 score := float64(fm.ScoreChunks(chunks)) 178 ranges := fm.MatchedRanges() 179 if len(ranges) > 0 { 180 return ranges[0], score 181 } 182 return -1, score 183 } 184 } 185 186 // parseQuery parses a field-separated symbol query, extracting the special 187 // characters listed below, and returns a matcherFunc corresponding to the AND 188 // of all field queries. 189 // 190 // Special characters: 191 // 192 // ^ match exact prefix 193 // $ match exact suffix 194 // ' match exact 195 // 196 // In all three of these special queries, matches are 'smart-cased', meaning 197 // they are case sensitive if the symbol query contains any upper-case 198 // characters, and case insensitive otherwise. 199 func parseQuery(q string, newMatcher func(string) matcherFunc) matcherFunc { 200 fields := strings.Fields(q) 201 if len(fields) == 0 { 202 return func([]string) (int, float64) { return -1, 0 } 203 } 204 var funcs []matcherFunc 205 for _, field := range fields { 206 var f matcherFunc 207 switch { 208 case strings.HasPrefix(field, "^"): 209 prefix := field[1:] 210 f = smartCase(prefix, func(chunks []string) (int, float64) { 211 s := strings.Join(chunks, "") 212 if strings.HasPrefix(s, prefix) { 213 return 0, 1 214 } 215 return -1, 0 216 }) 217 case strings.HasPrefix(field, "'"): 218 exact := field[1:] 219 f = smartCase(exact, matchExact(exact)) 220 case strings.HasSuffix(field, "$"): 221 suffix := field[0 : len(field)-1] 222 f = smartCase(suffix, func(chunks []string) (int, float64) { 223 s := strings.Join(chunks, "") 224 if strings.HasSuffix(s, suffix) { 225 return len(s) - len(suffix), 1 226 } 227 return -1, 0 228 }) 229 default: 230 f = newMatcher(field) 231 } 232 funcs = append(funcs, f) 233 } 234 if len(funcs) == 1 { 235 return funcs[0] 236 } 237 return comboMatcher(funcs).match 238 } 239 240 func matchExact(exact string) matcherFunc { 241 return func(chunks []string) (int, float64) { 242 s := strings.Join(chunks, "") 243 if idx := strings.LastIndex(s, exact); idx >= 0 { 244 return idx, 1 245 } 246 return -1, 0 247 } 248 } 249 250 // smartCase returns a matcherFunc that is case-sensitive if q contains any 251 // upper-case characters, and case-insensitive otherwise. 252 func smartCase(q string, m matcherFunc) matcherFunc { 253 insensitive := strings.ToLower(q) == q 254 wrapper := []string{""} 255 return func(chunks []string) (int, float64) { 256 s := strings.Join(chunks, "") 257 if insensitive { 258 s = strings.ToLower(s) 259 } 260 wrapper[0] = s 261 return m(wrapper) 262 } 263 } 264 265 type comboMatcher []matcherFunc 266 267 func (c comboMatcher) match(chunks []string) (int, float64) { 268 score := 1.0 269 first := 0 270 for _, f := range c { 271 idx, s := f(chunks) 272 if idx < first { 273 first = idx 274 } 275 score *= s 276 } 277 return first, score 278 } 279 280 // collectSymbols calls snapshot.Symbols to walk the syntax trees of 281 // all files in the views' current snapshots, and returns a sorted, 282 // scored list of symbols that best match the parameters. 283 // 284 // How it matches symbols is parameterized by two interfaces: 285 // - A matcherFunc determines how well a string symbol matches a query. It 286 // returns a non-negative score indicating the quality of the match. A score 287 // of zero indicates no match. 288 // - A symbolizer determines how we extract the symbol for an object. This 289 // enables the 'symbolStyle' configuration option. 290 func collectSymbols(ctx context.Context, snapshots []*cache.Snapshot, matcherType settings.SymbolMatcher, symbolizer symbolizer, query string) ([]protocol.SymbolInformation, error) { 291 // Extract symbols from all files. 292 var work []symbolFile 293 var roots []string 294 seen := make(map[protocol.DocumentURI]bool) 295 // TODO(adonovan): opt: parallelize this loop? How often is len > 1? 296 for _, snapshot := range snapshots { 297 // Use the root view URIs for determining (lexically) 298 // whether a URI is in any open workspace. 299 folderURI := snapshot.Folder() 300 roots = append(roots, strings.TrimRight(string(folderURI), "/")) 301 302 filters := snapshot.Options().DirectoryFilters 303 filterer := cache.NewFilterer(filters) 304 folder := filepath.ToSlash(folderURI.Path()) 305 306 workspaceOnly := true 307 if snapshot.Options().SymbolScope == settings.AllSymbolScope { 308 workspaceOnly = false 309 } 310 symbols, err := snapshot.Symbols(ctx, workspaceOnly) 311 if err != nil { 312 return nil, err 313 } 314 315 for uri, syms := range symbols { 316 norm := filepath.ToSlash(uri.Path()) 317 nm := strings.TrimPrefix(norm, folder) 318 if filterer.Disallow(nm) { 319 continue 320 } 321 // Only scan each file once. 322 if seen[uri] { 323 continue 324 } 325 meta, err := NarrowestMetadataForFile(ctx, snapshot, uri) 326 if err != nil { 327 event.Error(ctx, fmt.Sprintf("missing metadata for %q", uri), err) 328 continue 329 } 330 seen[uri] = true 331 work = append(work, symbolFile{uri, meta, syms}) 332 } 333 } 334 335 // Match symbols in parallel. 336 // Each worker has its own symbolStore, 337 // which we merge at the end. 338 nmatchers := runtime.GOMAXPROCS(-1) // matching is CPU bound 339 results := make(chan *symbolStore) 340 for i := 0; i < nmatchers; i++ { 341 go func(i int) { 342 matcher := buildMatcher(matcherType, query) 343 store := new(symbolStore) 344 // Assign files to workers in round-robin fashion. 345 for j := i; j < len(work); j += nmatchers { 346 matchFile(store, symbolizer, matcher, roots, work[j]) 347 } 348 results <- store 349 }(i) 350 } 351 352 // Gather and merge results as they arrive. 353 var unified symbolStore 354 for i := 0; i < nmatchers; i++ { 355 store := <-results 356 for _, syms := range store.res { 357 unified.store(syms) 358 } 359 } 360 return unified.results(), nil 361 } 362 363 // symbolFile holds symbol information for a single file. 364 type symbolFile struct { 365 uri protocol.DocumentURI 366 mp *metadata.Package 367 syms []cache.Symbol 368 } 369 370 // matchFile scans a symbol file and adds matching symbols to the store. 371 func matchFile(store *symbolStore, symbolizer symbolizer, matcher matcherFunc, roots []string, i symbolFile) { 372 space := make([]string, 0, 3) 373 for _, sym := range i.syms { 374 symbolParts, score := symbolizer(space, sym.Name, i.mp, matcher) 375 376 // Check if the score is too low before applying any downranking. 377 if store.tooLow(score) { 378 continue 379 } 380 381 // Factors to apply to the match score for the purpose of downranking 382 // results. 383 // 384 // These numbers were crudely calibrated based on trial-and-error using a 385 // small number of sample queries. Adjust as necessary. 386 // 387 // All factors are multiplicative, meaning if more than one applies they are 388 // multiplied together. 389 const ( 390 // nonWorkspaceFactor is applied to symbols outside the workspace. 391 // Developers are less likely to want to jump to code that they 392 // are not actively working on. 393 nonWorkspaceFactor = 0.5 394 // nonWorkspaceUnexportedFactor is applied to unexported symbols outside 395 // the workspace. Since one wouldn't usually jump to unexported 396 // symbols to understand a package API, they are particularly irrelevant. 397 nonWorkspaceUnexportedFactor = 0.5 398 // every field or method nesting level to access the field decreases 399 // the score by a factor of 1.0 - depth*depthFactor, up to a depth of 400 // 3. 401 // 402 // Use a small constant here, as this exists mostly to break ties 403 // (e.g. given a type Foo and a field x.Foo, prefer Foo). 404 depthFactor = 0.01 405 ) 406 407 startWord := true 408 exported := true 409 depth := 0.0 410 for _, r := range sym.Name { 411 if startWord && !unicode.IsUpper(r) { 412 exported = false 413 } 414 if r == '.' { 415 startWord = true 416 depth++ 417 } else { 418 startWord = false 419 } 420 } 421 422 // TODO(rfindley): use metadata to determine if the file is in a workspace 423 // package, rather than this heuristic. 424 inWorkspace := false 425 for _, root := range roots { 426 if strings.HasPrefix(string(i.uri), root) { 427 inWorkspace = true 428 break 429 } 430 } 431 432 // Apply downranking based on workspace position. 433 if !inWorkspace { 434 score *= nonWorkspaceFactor 435 if !exported { 436 score *= nonWorkspaceUnexportedFactor 437 } 438 } 439 440 // Apply downranking based on symbol depth. 441 if depth > 3 { 442 depth = 3 443 } 444 score *= 1.0 - depth*depthFactor 445 446 if store.tooLow(score) { 447 continue 448 } 449 450 si := symbolInformation{ 451 score: score, 452 symbol: strings.Join(symbolParts, ""), 453 kind: sym.Kind, 454 uri: i.uri, 455 rng: sym.Range, 456 container: string(i.mp.PkgPath), 457 } 458 store.store(si) 459 } 460 } 461 462 type symbolStore struct { 463 res [maxSymbols]symbolInformation 464 } 465 466 // store inserts si into the sorted results, if si has a high enough score. 467 func (sc *symbolStore) store(si symbolInformation) { 468 if sc.tooLow(si.score) { 469 return 470 } 471 insertAt := sort.Search(len(sc.res), func(i int) bool { 472 // Sort by score, then symbol length, and finally lexically. 473 if sc.res[i].score != si.score { 474 return sc.res[i].score < si.score 475 } 476 if len(sc.res[i].symbol) != len(si.symbol) { 477 return len(sc.res[i].symbol) > len(si.symbol) 478 } 479 return sc.res[i].symbol > si.symbol 480 }) 481 if insertAt < len(sc.res)-1 { 482 copy(sc.res[insertAt+1:], sc.res[insertAt:len(sc.res)-1]) 483 } 484 sc.res[insertAt] = si 485 } 486 487 func (sc *symbolStore) tooLow(score float64) bool { 488 return score <= sc.res[len(sc.res)-1].score 489 } 490 491 func (sc *symbolStore) results() []protocol.SymbolInformation { 492 var res []protocol.SymbolInformation 493 for _, si := range sc.res { 494 if si.score <= 0 { 495 return res 496 } 497 res = append(res, si.asProtocolSymbolInformation()) 498 } 499 return res 500 } 501 502 // symbolInformation is a cut-down version of protocol.SymbolInformation that 503 // allows struct values of this type to be used as map keys. 504 type symbolInformation struct { 505 score float64 506 symbol string 507 container string 508 kind protocol.SymbolKind 509 uri protocol.DocumentURI 510 rng protocol.Range 511 } 512 513 // asProtocolSymbolInformation converts s to a protocol.SymbolInformation value. 514 // 515 // TODO: work out how to handle tags if/when they are needed. 516 func (s symbolInformation) asProtocolSymbolInformation() protocol.SymbolInformation { 517 return protocol.SymbolInformation{ 518 Name: s.symbol, 519 Kind: s.kind, 520 Location: protocol.Location{ 521 URI: s.uri, 522 Range: s.rng, 523 }, 524 ContainerName: s.container, 525 } 526 }