kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/serving/graph/graph.go (about) 1 /* 2 * Copyright 2017 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package graph provides a high-performance table-based implementation of the 18 // graph.Service. 19 // 20 // Table format: 21 // 22 // edgeSets:<ticket> -> srvpb.PagedEdgeSet 23 // edgePages:<page_key> -> srvpb.EdgePage 24 package graph // import "kythe.io/kythe/go/serving/graph" 25 26 import ( 27 "context" 28 "encoding/base64" 29 "fmt" 30 "regexp" 31 "strings" 32 33 "kythe.io/kythe/go/services/xrefs" 34 "kythe.io/kythe/go/storage/table" 35 "kythe.io/kythe/go/util/log" 36 37 "bitbucket.org/creachadair/stringset" 38 "golang.org/x/net/trace" 39 "google.golang.org/protobuf/proto" 40 41 cpb "kythe.io/kythe/proto/common_go_proto" 42 gpb "kythe.io/kythe/proto/graph_go_proto" 43 ipb "kythe.io/kythe/proto/internal_go_proto" 44 srvpb "kythe.io/kythe/proto/serving_go_proto" 45 ) 46 47 func tracePrintf(ctx context.Context, msg string, args ...any) { 48 if t, ok := trace.FromContext(ctx); ok { 49 t.LazyPrintf(msg, args...) 50 } 51 } 52 53 func nodeToInfo(patterns []*regexp.Regexp, n *srvpb.Node) *cpb.NodeInfo { 54 ni := &cpb.NodeInfo{Facts: make(map[string][]byte, len(n.Fact))} 55 for _, f := range n.Fact { 56 if xrefs.MatchesAny(f.Name, patterns) { 57 ni.Facts[f.Name] = f.Value 58 } 59 } 60 if len(ni.Facts) == 0 { 61 return nil 62 } 63 return ni 64 } 65 66 // Key prefixes for the combinedTable implementation. 67 const ( 68 edgeSetsTablePrefix = "edgeSets:" 69 edgePagesTablePrefix = "edgePages:" 70 ) 71 72 type edgeSetResult struct { 73 PagedEdgeSet *srvpb.PagedEdgeSet 74 75 Err error 76 } 77 78 type staticLookupTables interface { 79 pagedEdgeSets(ctx context.Context, tickets []string) (<-chan edgeSetResult, error) 80 edgePage(ctx context.Context, key string) (*srvpb.EdgePage, error) 81 } 82 83 // SplitTable implements the graph Service interface using separate static 84 // lookup tables for each API component. 85 type SplitTable struct { 86 // Edges is a table of srvpb.PagedEdgeSets keyed by their source tickets. 87 Edges table.Proto 88 89 // EdgePages is a table of srvpb.EdgePages keyed by their page keys. 90 EdgePages table.Proto 91 } 92 93 func lookupPagedEdgeSets(ctx context.Context, tbl table.Proto, keys [][]byte) (<-chan edgeSetResult, error) { 94 ch := make(chan edgeSetResult) 95 go func() { 96 defer close(ch) 97 for _, key := range keys { 98 var pes srvpb.PagedEdgeSet 99 if err := tbl.Lookup(ctx, key, &pes); err == table.ErrNoSuchKey { 100 log.WarningContextf(ctx, "Could not locate edges with key %q", key) 101 ch <- edgeSetResult{Err: err} 102 continue 103 } else if err != nil { 104 ticket := strings.TrimPrefix(string(key), edgeSetsTablePrefix) 105 ch <- edgeSetResult{ 106 Err: fmt.Errorf("edges lookup error (ticket %q): %v", ticket, err), 107 } 108 continue 109 } 110 111 ch <- edgeSetResult{PagedEdgeSet: &pes} 112 } 113 }() 114 return ch, nil 115 } 116 117 func toKeys(ss []string) [][]byte { 118 keys := make([][]byte, len(ss)) 119 for i, s := range ss { 120 keys[i] = []byte(s) 121 } 122 return keys 123 } 124 125 const ( 126 defaultPageSize = 2048 127 maxPageSize = 10000 128 ) 129 130 func (s *SplitTable) pagedEdgeSets(ctx context.Context, tickets []string) (<-chan edgeSetResult, error) { 131 tracePrintf(ctx, "Reading PagedEdgeSets: %s", tickets) 132 return lookupPagedEdgeSets(ctx, s.Edges, toKeys(tickets)) 133 } 134 func (s *SplitTable) edgePage(ctx context.Context, key string) (*srvpb.EdgePage, error) { 135 tracePrintf(ctx, "Reading EdgePage: %s", key) 136 var ep srvpb.EdgePage 137 return &ep, s.EdgePages.Lookup(ctx, []byte(key), &ep) 138 } 139 140 // Table implements the GraphService interface using static lookup tables. 141 type Table struct{ staticLookupTables } 142 143 // Nodes implements part of the graph Service interface. 144 func (t *Table) Nodes(ctx context.Context, req *gpb.NodesRequest) (*gpb.NodesReply, error) { 145 tickets, err := xrefs.FixTickets(req.Ticket) 146 if err != nil { 147 return nil, err 148 } 149 150 rs, err := t.pagedEdgeSets(ctx, tickets) 151 if err != nil { 152 return nil, err 153 } 154 defer func() { 155 // drain channel in case of errors 156 for range rs { 157 } 158 }() 159 160 reply := &gpb.NodesReply{Nodes: make(map[string]*cpb.NodeInfo, len(req.Ticket))} 161 patterns := xrefs.ConvertFilters(req.Filter) 162 163 for r := range rs { 164 if r.Err == table.ErrNoSuchKey { 165 continue 166 } else if r.Err != nil { 167 return nil, r.Err 168 } 169 node := r.PagedEdgeSet.Source 170 ni := &cpb.NodeInfo{Facts: make(map[string][]byte, len(node.Fact))} 171 for _, f := range node.Fact { 172 if len(patterns) == 0 || xrefs.MatchesAny(f.Name, patterns) { 173 ni.Facts[f.Name] = f.Value 174 } 175 } 176 if len(ni.Facts) > 0 { 177 reply.Nodes[node.Ticket] = ni 178 } 179 } 180 return reply, nil 181 } 182 183 // Edges implements part of the graph Service interface. 184 func (t *Table) Edges(ctx context.Context, req *gpb.EdgesRequest) (*gpb.EdgesReply, error) { 185 tickets, err := xrefs.FixTickets(req.Ticket) 186 if err != nil { 187 return nil, err 188 } 189 190 allowedKinds := stringset.New(req.Kind...) 191 return t.edges(ctx, edgesRequest{ 192 Tickets: tickets, 193 Filters: req.Filter, 194 Kinds: func(kind string) bool { 195 return allowedKinds.Empty() || allowedKinds.Contains(kind) 196 }, 197 198 PageSize: int(req.PageSize), 199 PageToken: req.PageToken, 200 }) 201 } 202 203 type edgesRequest struct { 204 Tickets []string 205 Filters []string 206 Kinds func(string) bool 207 208 TotalOnly bool 209 PageSize int 210 PageToken string 211 } 212 213 func (t *Table) edges(ctx context.Context, req edgesRequest) (*gpb.EdgesReply, error) { 214 stats := filterStats{ 215 max: int(req.PageSize), 216 } 217 if req.TotalOnly { 218 stats.max = 0 219 } else if stats.max < 0 { 220 return nil, fmt.Errorf("invalid page_size: %d", req.PageSize) 221 } else if stats.max == 0 { 222 stats.max = defaultPageSize 223 } else if stats.max > maxPageSize { 224 stats.max = maxPageSize 225 } 226 227 if req.PageToken != "" { 228 rec, err := base64.StdEncoding.DecodeString(req.PageToken) 229 if err != nil { 230 return nil, fmt.Errorf("invalid page_token: %q", req.PageToken) 231 } 232 var t ipb.PageToken 233 if err := proto.Unmarshal(rec, &t); err != nil || t.Index < 0 { 234 return nil, fmt.Errorf("invalid page_token: %q", req.PageToken) 235 } 236 stats.skip = int(t.Index) 237 } 238 pageToken := stats.skip 239 240 var nodeTickets stringset.Set 241 242 rs, err := t.pagedEdgeSets(ctx, req.Tickets) 243 if err != nil { 244 return nil, err 245 } 246 defer func() { 247 // drain channel in case of errors or early return 248 for range rs { 249 } 250 }() 251 252 patterns := xrefs.ConvertFilters(req.Filters) 253 254 reply := &gpb.EdgesReply{ 255 EdgeSets: make(map[string]*gpb.EdgeSet), 256 Nodes: make(map[string]*cpb.NodeInfo), 257 258 TotalEdgesByKind: make(map[string]int64), 259 } 260 for r := range rs { 261 if r.Err == table.ErrNoSuchKey { 262 continue 263 } else if r.Err != nil { 264 return nil, r.Err 265 } 266 pes := r.PagedEdgeSet 267 countEdgeKinds(pes, req.Kinds, reply.TotalEdgesByKind) 268 269 // Don't scan the EdgeSet_Groups if we're already at the specified page_size. 270 if stats.total == stats.max { 271 continue 272 } 273 274 groups := make(map[string]*gpb.EdgeSet_Group) 275 for _, grp := range pes.Group { 276 if req.Kinds == nil || req.Kinds(grp.Kind) { 277 ng, ns := stats.filter(grp) 278 if ng != nil { 279 for _, n := range ns { 280 if len(patterns) > 0 && !nodeTickets.Contains(n.Ticket) { 281 nodeTickets.Add(n.Ticket) 282 if info := nodeToInfo(patterns, n); info != nil { 283 reply.Nodes[n.Ticket] = info 284 } 285 } 286 } 287 groups[grp.Kind] = ng 288 if stats.total == stats.max { 289 break 290 } 291 } 292 } 293 } 294 295 // TODO(schroederc): ensure that pes.EdgeSet.Groups and pes.PageIndexes of 296 // the same kind are grouped together in the EdgesReply 297 298 if stats.total != stats.max { 299 for _, idx := range pes.PageIndex { 300 if req.Kinds == nil || req.Kinds(idx.EdgeKind) { 301 if stats.skipPage(idx) { 302 log.WarningContextf(ctx, "Skipping EdgePage: %s", idx.PageKey) 303 continue 304 } 305 306 log.InfoContextf(ctx, "Retrieving EdgePage: %s", idx.PageKey) 307 ep, err := t.edgePage(ctx, idx.PageKey) 308 if err == table.ErrNoSuchKey { 309 return nil, fmt.Errorf("internal error: missing edge page: %q", idx.PageKey) 310 } else if err != nil { 311 return nil, fmt.Errorf("edge page lookup error (page key: %q): %v", idx.PageKey, err) 312 } 313 314 ng, ns := stats.filter(ep.EdgesGroup) 315 if ng != nil { 316 for _, n := range ns { 317 if len(patterns) > 0 && !nodeTickets.Contains(n.Ticket) { 318 nodeTickets.Add(n.Ticket) 319 if info := nodeToInfo(patterns, n); info != nil { 320 reply.Nodes[n.Ticket] = info 321 } 322 } 323 } 324 groups[ep.EdgesGroup.Kind] = ng 325 if stats.total == stats.max { 326 break 327 } 328 } 329 } 330 } 331 } 332 333 if len(groups) > 0 { 334 reply.EdgeSets[pes.Source.Ticket] = &gpb.EdgeSet{Groups: groups} 335 336 if len(patterns) > 0 && !nodeTickets.Contains(pes.Source.Ticket) { 337 nodeTickets.Add(pes.Source.Ticket) 338 if info := nodeToInfo(patterns, pes.Source); info != nil { 339 reply.Nodes[pes.Source.Ticket] = info 340 } 341 } 342 } 343 } 344 totalEdgesPossible := int(sumEdgeKinds(reply.TotalEdgesByKind)) 345 if stats.total > stats.max { 346 panic(fmt.Sprintf("totalEdges greater than maxEdges: %d > %d", stats.total, stats.max)) 347 } else if pageToken+stats.total > totalEdgesPossible && pageToken <= totalEdgesPossible { 348 panic(fmt.Sprintf("pageToken+totalEdges greater than totalEdgesPossible: %d+%d > %d", pageToken, stats.total, totalEdgesPossible)) 349 } 350 351 if pageToken+stats.total != totalEdgesPossible && stats.total != 0 { 352 rec, err := proto.Marshal(&ipb.PageToken{Index: int32(pageToken + stats.total)}) 353 if err != nil { 354 return nil, fmt.Errorf("internal error: error marshalling page token: %v", err) 355 } 356 reply.NextPageToken = base64.StdEncoding.EncodeToString(rec) 357 } 358 359 return reply, nil 360 } 361 362 func countEdgeKinds(pes *srvpb.PagedEdgeSet, kindFilter func(string) bool, totals map[string]int64) { 363 for _, grp := range pes.Group { 364 if kindFilter == nil || kindFilter(grp.Kind) { 365 totals[grp.Kind] += int64(len(grp.Edge)) 366 } 367 } 368 for _, page := range pes.PageIndex { 369 if kindFilter == nil || kindFilter(page.EdgeKind) { 370 totals[page.EdgeKind] += int64(page.EdgeCount) 371 } 372 } 373 } 374 375 func sumEdgeKinds(totals map[string]int64) int64 { 376 var sum int64 377 for _, cnt := range totals { 378 sum += cnt 379 } 380 return sum 381 } 382 383 type filterStats struct { 384 skip, total, max int 385 } 386 387 func (s *filterStats) skipPage(idx *srvpb.PageIndex) bool { 388 if int(idx.EdgeCount) <= s.skip { 389 s.skip -= int(idx.EdgeCount) 390 return true 391 } 392 return s.total >= s.max 393 } 394 395 func (s *filterStats) filter(g *srvpb.EdgeGroup) (*gpb.EdgeSet_Group, []*srvpb.Node) { 396 edges := g.Edge 397 if len(edges) <= s.skip { 398 s.skip -= len(edges) 399 return nil, nil 400 } else if s.skip > 0 { 401 edges = edges[s.skip:] 402 s.skip = 0 403 } 404 405 if len(edges) > s.max-s.total { 406 edges = edges[:(s.max - s.total)] 407 } 408 409 s.total += len(edges) 410 411 targets := make([]*srvpb.Node, len(edges)) 412 for i, e := range edges { 413 targets[i] = e.Target 414 } 415 416 return &gpb.EdgeSet_Group{ 417 Edge: e2e(edges), 418 }, targets 419 } 420 421 func e2e(es []*srvpb.EdgeGroup_Edge) []*gpb.EdgeSet_Group_Edge { 422 edges := make([]*gpb.EdgeSet_Group_Edge, len(es)) 423 for i, e := range es { 424 edges[i] = &gpb.EdgeSet_Group_Edge{ 425 TargetTicket: e.Target.Ticket, 426 Ordinal: e.Ordinal, 427 } 428 } 429 return edges 430 } 431 432 // NewSplitTable returns a table based on the given serving tables for each API 433 // component. 434 func NewSplitTable(c *SplitTable) *Table { return &Table{c} } 435 436 // NewCombinedTable returns a table for the given combined graph lookup table. 437 // The table's keys are expected to be constructed using only the EdgeSetKey, 438 // EdgePageKey, and DecorationsKey functions. 439 func NewCombinedTable(t table.Proto) *Table { return &Table{&combinedTable{t}} } 440 441 // EdgeSetKey returns the edgeset CombinedTable key for the given source ticket. 442 func EdgeSetKey(ticket string) []byte { 443 return []byte(edgeSetsTablePrefix + ticket) 444 } 445 446 // EdgePageKey returns the edgepage CombinedTable key for the given key. 447 func EdgePageKey(key string) []byte { 448 return []byte(edgePagesTablePrefix + key) 449 } 450 451 type combinedTable struct{ table.Proto } 452 453 func (c *combinedTable) pagedEdgeSets(ctx context.Context, tickets []string) (<-chan edgeSetResult, error) { 454 keys := make([][]byte, len(tickets)) 455 for i, ticket := range tickets { 456 keys[i] = EdgeSetKey(ticket) 457 } 458 return lookupPagedEdgeSets(ctx, c, keys) 459 } 460 func (c *combinedTable) edgePage(ctx context.Context, key string) (*srvpb.EdgePage, error) { 461 var ep srvpb.EdgePage 462 return &ep, c.Lookup(ctx, EdgePageKey(key), &ep) 463 }