kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/services/link/link.go (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package link implements the link resolver service.
    18  package link // import "kythe.io/kythe/go/services/link"
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"regexp"
    24  	"sort"
    25  	"time"
    26  
    27  	"kythe.io/kythe/go/util/kytheuri"
    28  	"kythe.io/kythe/go/util/log"
    29  	"kythe.io/kythe/go/util/schema/edges"
    30  	"kythe.io/kythe/go/util/schema/facts"
    31  
    32  	"bitbucket.org/creachadair/stringset"
    33  	"golang.org/x/sync/errgroup"
    34  	"golang.org/x/sync/semaphore"
    35  	"google.golang.org/grpc/codes"
    36  	"google.golang.org/grpc/status"
    37  	"google.golang.org/protobuf/encoding/prototext"
    38  	"google.golang.org/protobuf/proto"
    39  
    40  	ipb "kythe.io/kythe/proto/identifier_go_proto"
    41  	linkpb "kythe.io/kythe/proto/link_go_proto"
    42  	xpb "kythe.io/kythe/proto/xref_go_proto"
    43  )
    44  
    45  // A Resolver implements the link service resolver by dispatching to a Kythe
    46  // XRefService and IdentifierService to resolve qualified names.
    47  type Resolver struct {
    48  	Client interface {
    49  		CrossReferences(context.Context, *xpb.CrossReferencesRequest) (*xpb.CrossReferencesReply, error)
    50  		Find(context.Context, *ipb.FindRequest) (*ipb.FindReply, error)
    51  	}
    52  }
    53  
    54  // Resolve implements the internals of the resolve method.
    55  func (s *Resolver) Resolve(ctx context.Context, req *linkpb.LinkRequest) (*linkpb.LinkReply, error) {
    56  	if req.Identifier == "" {
    57  		return nil, status.Error(codes.InvalidArgument, "missing link identifier")
    58  	}
    59  	include, err := compileLocation(req.Include, true)
    60  	if err != nil {
    61  		return nil, status.Errorf(codes.InvalidArgument, "include: %v", err)
    62  	}
    63  	exclude, err := compileLocation(req.Exclude, false)
    64  	if err != nil {
    65  		return nil, status.Errorf(codes.InvalidArgument, "exclude: %v", err)
    66  	}
    67  
    68  	// Stage 1: Resolve identifiers.
    69  	ireq := &ipb.FindRequest{
    70  		Identifier: req.Identifier,
    71  		Corpus:     req.Corpus,
    72  		Languages:  req.Language,
    73  	}
    74  	ids, err := s.Client.Find(ctx, ireq)
    75  	if err != nil {
    76  		return nil, err
    77  	}
    78  
    79  	idMatches := make(map[string]*ipb.FindReply_Match)
    80  	for _, m := range ids.Matches {
    81  		if !kindMatches(m, req.NodeKind) {
    82  			continue
    83  		}
    84  		idMatches[m.Ticket] = m
    85  	}
    86  
    87  	const maxMatches = 1000
    88  	if len(idMatches) == 0 {
    89  		return nil, status.Error(codes.NotFound, "no matches")
    90  	} else if len(idMatches) > maxMatches {
    91  		return nil, status.Errorf(codes.OutOfRange, "too many identifier matches (%d > %d)",
    92  			len(idMatches), maxMatches/2) // a comforting deceit
    93  	}
    94  	log.InfoContextf(ctx, "Found %d of %d matches for identifier %q",
    95  		len(idMatches), len(ids.Matches), req.Identifier)
    96  
    97  	// Stage 2: Find definitions of the matching nodes.
    98  	xreq := &xpb.CrossReferencesRequest{
    99  		Ticket:          stringset.FromKeys(idMatches).Unordered(),
   100  		DefinitionKind:  xpb.CrossReferencesRequest_BINDING_DEFINITIONS,
   101  		Snippets:        xpb.SnippetsKind_NONE,
   102  		Filter:          []string{facts.NodeKind, facts.Complete},
   103  		NodeDefinitions: true,
   104  	}
   105  	switch req.DefinitionKind {
   106  	case linkpb.LinkRequest_FULL:
   107  		xreq.DefinitionKind = xpb.CrossReferencesRequest_FULL_DEFINITIONS
   108  	case linkpb.LinkRequest_ANY:
   109  		xreq.DefinitionKind = xpb.CrossReferencesRequest_ALL_DEFINITIONS
   110  	case linkpb.LinkRequest_BINDING:
   111  	default:
   112  		log.WarningContextf(ctx, "Unknown definition kind %v (ignored)", req.DefinitionKind)
   113  	}
   114  	log.Info("Cross-references request:\n", prototext.Format(xreq))
   115  	defs, err := s.crossRefs(ctx, xreq)
   116  	if err != nil {
   117  		return nil, err
   118  	}
   119  	log.InfoContextf(ctx, "Found %d result sets", len(defs.CrossReferences))
   120  
   121  	// Gather all the anchors matching each definition. Also check whether any
   122  	// of the tickets is a complete definition, since that is preferred.
   123  	var numAnchors int
   124  	anchors := make(map[string][]*xpb.Anchor)
   125  	complete := stringset.New()
   126  	pref := false
   127  	for ticket, xrefs := range defs.CrossReferences {
   128  		log.InfoContextf(ctx, "Checking node %q...", ticket)
   129  
   130  		if req.Params != nil {
   131  			// Count parameters, and filter based on that.
   132  			nparams := 0
   133  			for _, rel := range xrefs.RelatedNode {
   134  				n := int(rel.Ordinal) + 1
   135  				if rel.RelationKind == edges.Param {
   136  					if n > nparams {
   137  						nparams = n
   138  					}
   139  				}
   140  			}
   141  			log.InfoContextf(ctx, "+ Node has %d parameters", nparams)
   142  			if n := int(req.Params.GetCount()); n != nparams {
   143  				log.InfoContextf(ctx, "- Wrong number of parameters (have %d, want %d)", nparams, n)
   144  				continue
   145  			}
   146  		}
   147  
   148  		// Check for complete definitions.
   149  		if comp, ok := defs.Nodes[ticket].GetFacts()[facts.Complete]; ok {
   150  			switch s := string(comp); s {
   151  			case "definition":
   152  				if !pref {
   153  					complete = stringset.New()
   154  					pref = true
   155  				}
   156  				complete.Add(ticket)
   157  				log.Info("+ Node is a preferred complete definition")
   158  			case "complete":
   159  				if !pref {
   160  					complete.Add(ticket)
   161  					log.Info("+ Node is a complete definition")
   162  				}
   163  			}
   164  		}
   165  		anchors[ticket] = findAnchors(ticket, defs, include, exclude)
   166  		numAnchors += len(anchors[ticket])
   167  	}
   168  
   169  	// If we do have any complete definitions, throw out all the others.
   170  	if len(complete) != 0 {
   171  		for ticket := range anchors {
   172  			if !complete.Contains(ticket) {
   173  				log.InfoContextf(ctx, "- Discarding incomplete definition %q", ticket)
   174  				delete(anchors, ticket)
   175  			}
   176  		}
   177  	}
   178  
   179  	// Stage 3: Filter the definitions by location. We have to do a little
   180  	// dance here to correctly deal with nodes whose definitions we share from
   181  	// code generation.
   182  	type result struct {
   183  		nodes stringset.Set // semantic node tickets defined here
   184  		link  *linkpb.Link  // the link message for the reply
   185  	}
   186  	type fileKey struct {
   187  		file string
   188  		line int32
   189  	}
   190  	seen := make(map[fileKey]*result)
   191  	for ticket, anchors := range anchors {
   192  		for _, anchor := range anchors {
   193  			// Record one link for each distinct location. We prefer location to
   194  			// anchor because there may be multiple anchors spanning the same
   195  			// location. We keep track of the semantic node tickets along the way,
   196  			// since the request may desire them.
   197  			key := fileKey{
   198  				file: anchor.Parent,
   199  				line: anchor.Span.Start.GetLineNumber(),
   200  			}
   201  			if res, ok := seen[key]; ok {
   202  				res.nodes.Add(ticket)
   203  			} else {
   204  				link := &linkpb.Link{
   205  					FileTicket: anchor.Parent,
   206  					Span:       anchor.Span,
   207  				}
   208  				seen[key] = &result{
   209  					nodes: stringset.New(ticket),
   210  					link:  link,
   211  				}
   212  			}
   213  		}
   214  	}
   215  
   216  	log.InfoContextf(ctx, "After filtering %d anchor locations there are %d unique results",
   217  		numAnchors, len(seen))
   218  	if len(seen) == 0 {
   219  		return nil, status.Error(codes.NotFound, "no matching definitions")
   220  	}
   221  
   222  	// Populate the links in the result, and order them for stability.
   223  	rsp := new(linkpb.LinkReply)
   224  	for _, res := range seen {
   225  		if req.IncludeNodes {
   226  			for _, ticket := range res.nodes.Elements() {
   227  				m := idMatches[ticket]
   228  				res.link.Nodes = append(res.link.Nodes, &linkpb.Link_Node{
   229  					Ticket:     ticket,
   230  					BaseName:   m.GetBaseName(),
   231  					Identifier: m.GetQualifiedName(),
   232  				})
   233  			}
   234  		}
   235  		rsp.Links = append(rsp.Links, res.link)
   236  		log.InfoContextf(ctx, "Result: %+v", res.link)
   237  	}
   238  	sort.Slice(rsp.Links, func(i, j int) bool {
   239  		return rsp.Links[i].FileTicket < rsp.Links[j].FileTicket
   240  	})
   241  
   242  	return rsp, nil
   243  }
   244  
   245  // kindMatches reports whether the kind and subkind of m match any of the
   246  // entries in kinds, which have the form "kind" or "kind/subkind".
   247  func kindMatches(m *ipb.FindReply_Match, kinds []string) bool {
   248  	if len(kinds) == 0 {
   249  		return m.NodeKind != "lookup"
   250  	}
   251  	key := m.NodeKind
   252  	if sk := m.NodeSubkind; sk != "" {
   253  		key += "/" + sk
   254  	}
   255  	return stringset.Index(m.NodeKind, kinds) >= 0 || stringset.Index(key, kinds) >= 0
   256  }
   257  
   258  // crossRefs calls XRefService.CrossReferences with the given request, issuing
   259  // one request per ticket in parallel. The API allows multiple tickets per
   260  // request, but server-side merging can get confused if we pass in related
   261  // tickets. The results are merged locally, which is safe.
   262  func (s *Resolver) crossRefs(ctx context.Context, req *xpb.CrossReferencesRequest) (_ *xpb.CrossReferencesReply, err error) {
   263  	start := time.Now()
   264  	defer func() { log.InfoContextf(ctx, "CrossReferences complete err=%v [%v elapsed]", err, time.Since(start)) }()
   265  
   266  	reqs := make([]*xpb.CrossReferencesRequest, 0, len(req.Ticket))
   267  	for _, ticket := range req.Ticket {
   268  		next := proto.Clone(req).(*xpb.CrossReferencesRequest)
   269  		next.Ticket = []string{ticket}
   270  		reqs = append(reqs, next)
   271  	}
   272  	rsps := make([]*xpb.CrossReferencesReply, len(req.Ticket))
   273  	sem := semaphore.NewWeighted(32)
   274  	g, gctx := errgroup.WithContext(ctx)
   275  	for i, req := range reqs {
   276  		i, req := i, req
   277  		if sem.Acquire(gctx, 1) != nil {
   278  			break
   279  		}
   280  		g.Go(func() error {
   281  			defer sem.Release(1)
   282  			var err error
   283  			rsps[i], err = s.Client.CrossReferences(gctx, req)
   284  			return err
   285  		})
   286  	}
   287  	if err := g.Wait(); err != nil {
   288  		return nil, err
   289  	}
   290  	for _, rsp := range rsps[1:] {
   291  		proto.Merge(rsps[0], rsp)
   292  	}
   293  	return rsps[0], nil
   294  }
   295  
   296  type matcher func(*kytheuri.URI) bool
   297  
   298  // compileLocation returns a matching function from the specified location.
   299  // The function returns keep for all inputs if if loc == nil.
   300  func compileLocation(locs []*linkpb.LinkRequest_Location, keep bool) (matcher, error) {
   301  	if len(locs) == 0 {
   302  		return func(*kytheuri.URI) bool { return keep }, nil
   303  	}
   304  	var matchPath, matchRoot []func(string) bool
   305  	corpora := stringset.New()
   306  	for _, loc := range locs {
   307  		if r, err := regexp.Compile(loc.Path); err == nil {
   308  			matchPath = append(matchPath, r.MatchString)
   309  		} else {
   310  			return nil, fmt.Errorf("path regexp: %v", err)
   311  		}
   312  		if r, err := regexp.Compile(loc.Root); err == nil {
   313  			matchRoot = append(matchRoot, r.MatchString)
   314  		} else {
   315  			return nil, fmt.Errorf("root regexp: %v", err)
   316  		}
   317  		if loc.Corpus != "" {
   318  			corpora.Add(loc.Corpus)
   319  		}
   320  	}
   321  	matchCorpus := func(c string) bool { return corpora.Len() == 0 || corpora.Contains(c) }
   322  	return func(u *kytheuri.URI) bool {
   323  		for i := 0; i < len(matchPath); i++ {
   324  			if matchPath[i](u.Path) && matchRoot[i](u.Root) && matchCorpus(u.Corpus) {
   325  				return true
   326  			}
   327  		}
   328  		return false
   329  	}, nil
   330  }
   331  
   332  // findAnchors locates valid anchors for the given ticket in rsp, according to
   333  // the include and exclude rules, as well as ticket language and corpus.
   334  func findAnchors(ticket string, rsp *xpb.CrossReferencesReply, include, exclude matcher) []*xpb.Anchor {
   335  	t, err := kytheuri.Parse(ticket)
   336  	if err != nil {
   337  		log.Errorf("Invalid ticket %q: %v", ticket, err)
   338  		return nil
   339  	}
   340  	check := func(ticket string) bool {
   341  		if a, err := kytheuri.Parse(ticket); err != nil {
   342  			log.Infof("- Invalid ticket %s: %v", ticket, err)
   343  		} else if t.Language != "" && a.Language != t.Language {
   344  			log.Infof("- Language mismatch (%q ≠ %q)", a.Language, t.Language)
   345  		} else if !include(a) || exclude(a) {
   346  			log.Infof("- Filter mismatch %s", ticket)
   347  		} else {
   348  			return true
   349  		}
   350  		return false
   351  	}
   352  
   353  	// If there is a single direct definition, it supersedes all other options.
   354  	if node, ok := rsp.Nodes[ticket]; ok && node.Definition != "" {
   355  		if anchor, ok := rsp.DefinitionLocations[node.Definition]; ok {
   356  			if check(anchor.Ticket) {
   357  				log.Infof("+ Found matching definition anchor: %s", anchor.Ticket)
   358  				return []*xpb.Anchor{anchor}
   359  			}
   360  		}
   361  	}
   362  
   363  	// Look for a definition among the possibly-clustered hoi polloi.
   364  	var result []*xpb.Anchor
   365  	for _, def := range rsp.CrossReferences[ticket].GetDefinition() {
   366  		anchor := def.Anchor
   367  		if !edges.IsVariant(anchor.Kind, edges.Defines) {
   368  			continue // not a definition
   369  		}
   370  		if check(anchor.Ticket) {
   371  			log.Infof("+ Found matching anchor: %s", anchor.Ticket)
   372  			result = append(result, anchor)
   373  		}
   374  	}
   375  
   376  	return result
   377  }