kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/serving/graph/graph.go (about)

     1  /*
     2   * Copyright 2017 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package graph provides a high-performance table-based implementation of the
    18  // graph.Service.
    19  //
    20  // Table format:
    21  //
    22  //	edgeSets:<ticket>      -> srvpb.PagedEdgeSet
    23  //	edgePages:<page_key>   -> srvpb.EdgePage
    24  package graph // import "kythe.io/kythe/go/serving/graph"
    25  
    26  import (
    27  	"context"
    28  	"encoding/base64"
    29  	"fmt"
    30  	"regexp"
    31  	"strings"
    32  
    33  	"kythe.io/kythe/go/services/xrefs"
    34  	"kythe.io/kythe/go/storage/table"
    35  	"kythe.io/kythe/go/util/log"
    36  
    37  	"bitbucket.org/creachadair/stringset"
    38  	"golang.org/x/net/trace"
    39  	"google.golang.org/protobuf/proto"
    40  
    41  	cpb "kythe.io/kythe/proto/common_go_proto"
    42  	gpb "kythe.io/kythe/proto/graph_go_proto"
    43  	ipb "kythe.io/kythe/proto/internal_go_proto"
    44  	srvpb "kythe.io/kythe/proto/serving_go_proto"
    45  )
    46  
    47  func tracePrintf(ctx context.Context, msg string, args ...any) {
    48  	if t, ok := trace.FromContext(ctx); ok {
    49  		t.LazyPrintf(msg, args...)
    50  	}
    51  }
    52  
    53  func nodeToInfo(patterns []*regexp.Regexp, n *srvpb.Node) *cpb.NodeInfo {
    54  	ni := &cpb.NodeInfo{Facts: make(map[string][]byte, len(n.Fact))}
    55  	for _, f := range n.Fact {
    56  		if xrefs.MatchesAny(f.Name, patterns) {
    57  			ni.Facts[f.Name] = f.Value
    58  		}
    59  	}
    60  	if len(ni.Facts) == 0 {
    61  		return nil
    62  	}
    63  	return ni
    64  }
    65  
    66  // Key prefixes for the combinedTable implementation.
    67  const (
    68  	edgeSetsTablePrefix  = "edgeSets:"
    69  	edgePagesTablePrefix = "edgePages:"
    70  )
    71  
    72  type edgeSetResult struct {
    73  	PagedEdgeSet *srvpb.PagedEdgeSet
    74  
    75  	Err error
    76  }
    77  
    78  type staticLookupTables interface {
    79  	pagedEdgeSets(ctx context.Context, tickets []string) (<-chan edgeSetResult, error)
    80  	edgePage(ctx context.Context, key string) (*srvpb.EdgePage, error)
    81  }
    82  
    83  // SplitTable implements the graph Service interface using separate static
    84  // lookup tables for each API component.
    85  type SplitTable struct {
    86  	// Edges is a table of srvpb.PagedEdgeSets keyed by their source tickets.
    87  	Edges table.Proto
    88  
    89  	// EdgePages is a table of srvpb.EdgePages keyed by their page keys.
    90  	EdgePages table.Proto
    91  }
    92  
    93  func lookupPagedEdgeSets(ctx context.Context, tbl table.Proto, keys [][]byte) (<-chan edgeSetResult, error) {
    94  	ch := make(chan edgeSetResult)
    95  	go func() {
    96  		defer close(ch)
    97  		for _, key := range keys {
    98  			var pes srvpb.PagedEdgeSet
    99  			if err := tbl.Lookup(ctx, key, &pes); err == table.ErrNoSuchKey {
   100  				log.WarningContextf(ctx, "Could not locate edges with key %q", key)
   101  				ch <- edgeSetResult{Err: err}
   102  				continue
   103  			} else if err != nil {
   104  				ticket := strings.TrimPrefix(string(key), edgeSetsTablePrefix)
   105  				ch <- edgeSetResult{
   106  					Err: fmt.Errorf("edges lookup error (ticket %q): %v", ticket, err),
   107  				}
   108  				continue
   109  			}
   110  
   111  			ch <- edgeSetResult{PagedEdgeSet: &pes}
   112  		}
   113  	}()
   114  	return ch, nil
   115  }
   116  
   117  func toKeys(ss []string) [][]byte {
   118  	keys := make([][]byte, len(ss))
   119  	for i, s := range ss {
   120  		keys[i] = []byte(s)
   121  	}
   122  	return keys
   123  }
   124  
   125  const (
   126  	defaultPageSize = 2048
   127  	maxPageSize     = 10000
   128  )
   129  
   130  func (s *SplitTable) pagedEdgeSets(ctx context.Context, tickets []string) (<-chan edgeSetResult, error) {
   131  	tracePrintf(ctx, "Reading PagedEdgeSets: %s", tickets)
   132  	return lookupPagedEdgeSets(ctx, s.Edges, toKeys(tickets))
   133  }
   134  func (s *SplitTable) edgePage(ctx context.Context, key string) (*srvpb.EdgePage, error) {
   135  	tracePrintf(ctx, "Reading EdgePage: %s", key)
   136  	var ep srvpb.EdgePage
   137  	return &ep, s.EdgePages.Lookup(ctx, []byte(key), &ep)
   138  }
   139  
   140  // Table implements the GraphService interface using static lookup tables.
   141  type Table struct{ staticLookupTables }
   142  
   143  // Nodes implements part of the graph Service interface.
   144  func (t *Table) Nodes(ctx context.Context, req *gpb.NodesRequest) (*gpb.NodesReply, error) {
   145  	tickets, err := xrefs.FixTickets(req.Ticket)
   146  	if err != nil {
   147  		return nil, err
   148  	}
   149  
   150  	rs, err := t.pagedEdgeSets(ctx, tickets)
   151  	if err != nil {
   152  		return nil, err
   153  	}
   154  	defer func() {
   155  		// drain channel in case of errors
   156  		for range rs {
   157  		}
   158  	}()
   159  
   160  	reply := &gpb.NodesReply{Nodes: make(map[string]*cpb.NodeInfo, len(req.Ticket))}
   161  	patterns := xrefs.ConvertFilters(req.Filter)
   162  
   163  	for r := range rs {
   164  		if r.Err == table.ErrNoSuchKey {
   165  			continue
   166  		} else if r.Err != nil {
   167  			return nil, r.Err
   168  		}
   169  		node := r.PagedEdgeSet.Source
   170  		ni := &cpb.NodeInfo{Facts: make(map[string][]byte, len(node.Fact))}
   171  		for _, f := range node.Fact {
   172  			if len(patterns) == 0 || xrefs.MatchesAny(f.Name, patterns) {
   173  				ni.Facts[f.Name] = f.Value
   174  			}
   175  		}
   176  		if len(ni.Facts) > 0 {
   177  			reply.Nodes[node.Ticket] = ni
   178  		}
   179  	}
   180  	return reply, nil
   181  }
   182  
   183  // Edges implements part of the graph Service interface.
   184  func (t *Table) Edges(ctx context.Context, req *gpb.EdgesRequest) (*gpb.EdgesReply, error) {
   185  	tickets, err := xrefs.FixTickets(req.Ticket)
   186  	if err != nil {
   187  		return nil, err
   188  	}
   189  
   190  	allowedKinds := stringset.New(req.Kind...)
   191  	return t.edges(ctx, edgesRequest{
   192  		Tickets: tickets,
   193  		Filters: req.Filter,
   194  		Kinds: func(kind string) bool {
   195  			return allowedKinds.Empty() || allowedKinds.Contains(kind)
   196  		},
   197  
   198  		PageSize:  int(req.PageSize),
   199  		PageToken: req.PageToken,
   200  	})
   201  }
   202  
   203  type edgesRequest struct {
   204  	Tickets []string
   205  	Filters []string
   206  	Kinds   func(string) bool
   207  
   208  	TotalOnly bool
   209  	PageSize  int
   210  	PageToken string
   211  }
   212  
   213  func (t *Table) edges(ctx context.Context, req edgesRequest) (*gpb.EdgesReply, error) {
   214  	stats := filterStats{
   215  		max: int(req.PageSize),
   216  	}
   217  	if req.TotalOnly {
   218  		stats.max = 0
   219  	} else if stats.max < 0 {
   220  		return nil, fmt.Errorf("invalid page_size: %d", req.PageSize)
   221  	} else if stats.max == 0 {
   222  		stats.max = defaultPageSize
   223  	} else if stats.max > maxPageSize {
   224  		stats.max = maxPageSize
   225  	}
   226  
   227  	if req.PageToken != "" {
   228  		rec, err := base64.StdEncoding.DecodeString(req.PageToken)
   229  		if err != nil {
   230  			return nil, fmt.Errorf("invalid page_token: %q", req.PageToken)
   231  		}
   232  		var t ipb.PageToken
   233  		if err := proto.Unmarshal(rec, &t); err != nil || t.Index < 0 {
   234  			return nil, fmt.Errorf("invalid page_token: %q", req.PageToken)
   235  		}
   236  		stats.skip = int(t.Index)
   237  	}
   238  	pageToken := stats.skip
   239  
   240  	var nodeTickets stringset.Set
   241  
   242  	rs, err := t.pagedEdgeSets(ctx, req.Tickets)
   243  	if err != nil {
   244  		return nil, err
   245  	}
   246  	defer func() {
   247  		// drain channel in case of errors or early return
   248  		for range rs {
   249  		}
   250  	}()
   251  
   252  	patterns := xrefs.ConvertFilters(req.Filters)
   253  
   254  	reply := &gpb.EdgesReply{
   255  		EdgeSets: make(map[string]*gpb.EdgeSet),
   256  		Nodes:    make(map[string]*cpb.NodeInfo),
   257  
   258  		TotalEdgesByKind: make(map[string]int64),
   259  	}
   260  	for r := range rs {
   261  		if r.Err == table.ErrNoSuchKey {
   262  			continue
   263  		} else if r.Err != nil {
   264  			return nil, r.Err
   265  		}
   266  		pes := r.PagedEdgeSet
   267  		countEdgeKinds(pes, req.Kinds, reply.TotalEdgesByKind)
   268  
   269  		// Don't scan the EdgeSet_Groups if we're already at the specified page_size.
   270  		if stats.total == stats.max {
   271  			continue
   272  		}
   273  
   274  		groups := make(map[string]*gpb.EdgeSet_Group)
   275  		for _, grp := range pes.Group {
   276  			if req.Kinds == nil || req.Kinds(grp.Kind) {
   277  				ng, ns := stats.filter(grp)
   278  				if ng != nil {
   279  					for _, n := range ns {
   280  						if len(patterns) > 0 && !nodeTickets.Contains(n.Ticket) {
   281  							nodeTickets.Add(n.Ticket)
   282  							if info := nodeToInfo(patterns, n); info != nil {
   283  								reply.Nodes[n.Ticket] = info
   284  							}
   285  						}
   286  					}
   287  					groups[grp.Kind] = ng
   288  					if stats.total == stats.max {
   289  						break
   290  					}
   291  				}
   292  			}
   293  		}
   294  
   295  		// TODO(schroederc): ensure that pes.EdgeSet.Groups and pes.PageIndexes of
   296  		// the same kind are grouped together in the EdgesReply
   297  
   298  		if stats.total != stats.max {
   299  			for _, idx := range pes.PageIndex {
   300  				if req.Kinds == nil || req.Kinds(idx.EdgeKind) {
   301  					if stats.skipPage(idx) {
   302  						log.WarningContextf(ctx, "Skipping EdgePage: %s", idx.PageKey)
   303  						continue
   304  					}
   305  
   306  					log.InfoContextf(ctx, "Retrieving EdgePage: %s", idx.PageKey)
   307  					ep, err := t.edgePage(ctx, idx.PageKey)
   308  					if err == table.ErrNoSuchKey {
   309  						return nil, fmt.Errorf("internal error: missing edge page: %q", idx.PageKey)
   310  					} else if err != nil {
   311  						return nil, fmt.Errorf("edge page lookup error (page key: %q): %v", idx.PageKey, err)
   312  					}
   313  
   314  					ng, ns := stats.filter(ep.EdgesGroup)
   315  					if ng != nil {
   316  						for _, n := range ns {
   317  							if len(patterns) > 0 && !nodeTickets.Contains(n.Ticket) {
   318  								nodeTickets.Add(n.Ticket)
   319  								if info := nodeToInfo(patterns, n); info != nil {
   320  									reply.Nodes[n.Ticket] = info
   321  								}
   322  							}
   323  						}
   324  						groups[ep.EdgesGroup.Kind] = ng
   325  						if stats.total == stats.max {
   326  							break
   327  						}
   328  					}
   329  				}
   330  			}
   331  		}
   332  
   333  		if len(groups) > 0 {
   334  			reply.EdgeSets[pes.Source.Ticket] = &gpb.EdgeSet{Groups: groups}
   335  
   336  			if len(patterns) > 0 && !nodeTickets.Contains(pes.Source.Ticket) {
   337  				nodeTickets.Add(pes.Source.Ticket)
   338  				if info := nodeToInfo(patterns, pes.Source); info != nil {
   339  					reply.Nodes[pes.Source.Ticket] = info
   340  				}
   341  			}
   342  		}
   343  	}
   344  	totalEdgesPossible := int(sumEdgeKinds(reply.TotalEdgesByKind))
   345  	if stats.total > stats.max {
   346  		panic(fmt.Sprintf("totalEdges greater than maxEdges: %d > %d", stats.total, stats.max))
   347  	} else if pageToken+stats.total > totalEdgesPossible && pageToken <= totalEdgesPossible {
   348  		panic(fmt.Sprintf("pageToken+totalEdges greater than totalEdgesPossible: %d+%d > %d", pageToken, stats.total, totalEdgesPossible))
   349  	}
   350  
   351  	if pageToken+stats.total != totalEdgesPossible && stats.total != 0 {
   352  		rec, err := proto.Marshal(&ipb.PageToken{Index: int32(pageToken + stats.total)})
   353  		if err != nil {
   354  			return nil, fmt.Errorf("internal error: error marshalling page token: %v", err)
   355  		}
   356  		reply.NextPageToken = base64.StdEncoding.EncodeToString(rec)
   357  	}
   358  
   359  	return reply, nil
   360  }
   361  
   362  func countEdgeKinds(pes *srvpb.PagedEdgeSet, kindFilter func(string) bool, totals map[string]int64) {
   363  	for _, grp := range pes.Group {
   364  		if kindFilter == nil || kindFilter(grp.Kind) {
   365  			totals[grp.Kind] += int64(len(grp.Edge))
   366  		}
   367  	}
   368  	for _, page := range pes.PageIndex {
   369  		if kindFilter == nil || kindFilter(page.EdgeKind) {
   370  			totals[page.EdgeKind] += int64(page.EdgeCount)
   371  		}
   372  	}
   373  }
   374  
   375  func sumEdgeKinds(totals map[string]int64) int64 {
   376  	var sum int64
   377  	for _, cnt := range totals {
   378  		sum += cnt
   379  	}
   380  	return sum
   381  }
   382  
   383  type filterStats struct {
   384  	skip, total, max int
   385  }
   386  
   387  func (s *filterStats) skipPage(idx *srvpb.PageIndex) bool {
   388  	if int(idx.EdgeCount) <= s.skip {
   389  		s.skip -= int(idx.EdgeCount)
   390  		return true
   391  	}
   392  	return s.total >= s.max
   393  }
   394  
   395  func (s *filterStats) filter(g *srvpb.EdgeGroup) (*gpb.EdgeSet_Group, []*srvpb.Node) {
   396  	edges := g.Edge
   397  	if len(edges) <= s.skip {
   398  		s.skip -= len(edges)
   399  		return nil, nil
   400  	} else if s.skip > 0 {
   401  		edges = edges[s.skip:]
   402  		s.skip = 0
   403  	}
   404  
   405  	if len(edges) > s.max-s.total {
   406  		edges = edges[:(s.max - s.total)]
   407  	}
   408  
   409  	s.total += len(edges)
   410  
   411  	targets := make([]*srvpb.Node, len(edges))
   412  	for i, e := range edges {
   413  		targets[i] = e.Target
   414  	}
   415  
   416  	return &gpb.EdgeSet_Group{
   417  		Edge: e2e(edges),
   418  	}, targets
   419  }
   420  
   421  func e2e(es []*srvpb.EdgeGroup_Edge) []*gpb.EdgeSet_Group_Edge {
   422  	edges := make([]*gpb.EdgeSet_Group_Edge, len(es))
   423  	for i, e := range es {
   424  		edges[i] = &gpb.EdgeSet_Group_Edge{
   425  			TargetTicket: e.Target.Ticket,
   426  			Ordinal:      e.Ordinal,
   427  		}
   428  	}
   429  	return edges
   430  }
   431  
   432  // NewSplitTable returns a table based on the given serving tables for each API
   433  // component.
   434  func NewSplitTable(c *SplitTable) *Table { return &Table{c} }
   435  
   436  // NewCombinedTable returns a table for the given combined graph lookup table.
   437  // The table's keys are expected to be constructed using only the EdgeSetKey,
   438  // EdgePageKey, and DecorationsKey functions.
   439  func NewCombinedTable(t table.Proto) *Table { return &Table{&combinedTable{t}} }
   440  
   441  // EdgeSetKey returns the edgeset CombinedTable key for the given source ticket.
   442  func EdgeSetKey(ticket string) []byte {
   443  	return []byte(edgeSetsTablePrefix + ticket)
   444  }
   445  
   446  // EdgePageKey returns the edgepage CombinedTable key for the given key.
   447  func EdgePageKey(key string) []byte {
   448  	return []byte(edgePagesTablePrefix + key)
   449  }
   450  
   451  type combinedTable struct{ table.Proto }
   452  
   453  func (c *combinedTable) pagedEdgeSets(ctx context.Context, tickets []string) (<-chan edgeSetResult, error) {
   454  	keys := make([][]byte, len(tickets))
   455  	for i, ticket := range tickets {
   456  		keys[i] = EdgeSetKey(ticket)
   457  	}
   458  	return lookupPagedEdgeSets(ctx, c, keys)
   459  }
   460  func (c *combinedTable) edgePage(ctx context.Context, key string) (*srvpb.EdgePage, error) {
   461  	var ep srvpb.EdgePage
   462  	return &ep, c.Lookup(ctx, EdgePageKey(key), &ep)
   463  }