kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/serving/pipeline/beam.go (about) 1 /* 2 * Copyright 2018 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package pipeline 18 19 import ( 20 "fmt" 21 "reflect" 22 "sort" 23 "strconv" 24 25 "kythe.io/kythe/go/serving/pipeline/nodes" 26 "kythe.io/kythe/go/serving/xrefs/assemble" 27 "kythe.io/kythe/go/util/compare" 28 "kythe.io/kythe/go/util/kytheuri" 29 "kythe.io/kythe/go/util/log" 30 "kythe.io/kythe/go/util/schema" 31 "kythe.io/kythe/go/util/schema/edges" 32 "kythe.io/kythe/go/util/schema/facts" 33 kinds "kythe.io/kythe/go/util/schema/nodes" 34 "kythe.io/kythe/go/util/span" 35 36 "github.com/apache/beam/sdks/go/pkg/beam" 37 "github.com/apache/beam/sdks/go/pkg/beam/transforms/filter" 38 "google.golang.org/protobuf/proto" 39 40 cpb "kythe.io/kythe/proto/common_go_proto" 41 gspb "kythe.io/kythe/proto/graph_serving_go_proto" 42 ppb "kythe.io/kythe/proto/pipeline_go_proto" 43 scpb "kythe.io/kythe/proto/schema_go_proto" 44 srvpb "kythe.io/kythe/proto/serving_go_proto" 45 spb "kythe.io/kythe/proto/storage_go_proto" 46 xspb "kythe.io/kythe/proto/xref_serving_go_proto" 47 ) 48 49 func init() { 50 beam.RegisterFunction(bareRevEdge) 51 beam.RegisterFunction(callEdge) 52 beam.RegisterFunction(combineEdgesIndex) 53 beam.RegisterFunction(completeDocument) 54 beam.RegisterFunction(constructCaller) 55 beam.RegisterFunction(defToDecorPiece) 56 beam.RegisterFunction(diagToDecor) 57 beam.RegisterFunction(edgeTargets) 58 beam.RegisterFunction(edgeToCrossRefRelation) 59 beam.RegisterFunction(emitRelatedDefs) 60 beam.RegisterFunction(fileToDecorPiece) 61 beam.RegisterFunction(fileToTags) 62 beam.RegisterFunction(filterAnchorNodes) 63 beam.RegisterFunction(groupCrossRefs) 64 beam.RegisterFunction(groupEdges) 65 beam.RegisterFunction(keyByPath) 66 beam.RegisterFunction(keyCrossRef) 67 beam.RegisterFunction(keyNode) 68 beam.RegisterFunction(keyRef) 69 beam.RegisterFunction(moveSourceToKey) 70 beam.RegisterFunction(nodeToChildren) 71 beam.RegisterFunction(nodeToDecorPiece) 72 beam.RegisterFunction(nodeToDiagnostic) 73 beam.RegisterFunction(nodeToDocs) 74 beam.RegisterFunction(nodeToEdges) 75 beam.RegisterFunction(nodeToReverseEdges) 76 beam.RegisterFunction(overriddenToDecor) 77 beam.RegisterFunction(overridingToFile) 78 beam.RegisterFunction(parseMarkedSource) 79 beam.RegisterFunction(refToCallsite) 80 beam.RegisterFunction(refToCrossRef) 81 beam.RegisterFunction(refToDecorPiece) 82 beam.RegisterFunction(refToTag) 83 beam.RegisterFunction(reverseEdge) 84 beam.RegisterFunction(splitEdge) 85 beam.RegisterFunction(targetToFile) 86 beam.RegisterFunction(toDefinition) 87 beam.RegisterFunction(toFiles) 88 beam.RegisterFunction(toRefs) 89 90 beam.RegisterType(reflect.TypeOf((*combineDecorPieces)(nil)).Elem()) 91 beam.RegisterType(reflect.TypeOf((*ticketKey)(nil)).Elem()) 92 93 beam.RegisterType(reflect.TypeOf((*cpb.Diagnostic)(nil)).Elem()) 94 beam.RegisterType(reflect.TypeOf((*cpb.MarkedSource)(nil)).Elem()) 95 beam.RegisterType(reflect.TypeOf((*ppb.DecorationPiece)(nil)).Elem()) 96 beam.RegisterType(reflect.TypeOf((*ppb.Reference)(nil)).Elem()) 97 beam.RegisterType(reflect.TypeOf((*scpb.Edge)(nil)).Elem()) 98 beam.RegisterType(reflect.TypeOf((*scpb.Node)(nil)).Elem()) 99 beam.RegisterType(reflect.TypeOf((*spb.Entry)(nil)).Elem()) 100 beam.RegisterType(reflect.TypeOf((*spb.VName)(nil)).Elem()) 101 beam.RegisterType(reflect.TypeOf((*srvpb.CorpusRoots)(nil)).Elem()) 102 beam.RegisterType(reflect.TypeOf((*srvpb.Document)(nil)).Elem()) 103 beam.RegisterType(reflect.TypeOf((*srvpb.EdgePage)(nil)).Elem()) 104 beam.RegisterType(reflect.TypeOf((*srvpb.ExpandedAnchor)(nil)).Elem()) 105 beam.RegisterType(reflect.TypeOf((*srvpb.File)(nil)).Elem()) 106 beam.RegisterType(reflect.TypeOf((*srvpb.FileDecorations)(nil)).Elem()) 107 beam.RegisterType(reflect.TypeOf((*srvpb.FileDirectory)(nil)).Elem()) 108 beam.RegisterType(reflect.TypeOf((*srvpb.PagedCrossReferences)(nil)).Elem()) 109 beam.RegisterType(reflect.TypeOf((*srvpb.PagedCrossReferences_Page)(nil)).Elem()) 110 beam.RegisterType(reflect.TypeOf((*srvpb.PagedEdgeSet)(nil)).Elem()) 111 } 112 113 // KytheBeam controls the lifetime and generation of PCollections in the Kythe 114 // pipeline. 115 type KytheBeam struct { 116 s beam.Scope 117 118 fileVNames beam.PCollection // *spb.VName 119 nodes beam.PCollection // *scpb.Node 120 files beam.PCollection // *srvpb.File 121 refs beam.PCollection // *ppb.Reference 122 edges beam.PCollection // *gspb.Edges 123 124 markedSources beam.PCollection // KV<*spb.VName, *cpb.MarkedSource> 125 126 anchorBuildConfigs beam.PCollection // KV<*spb.VName, string> 127 } 128 129 // FromNodes creates a KytheBeam pipeline from an input collection of 130 // *spb.Nodes. 131 func FromNodes(s beam.Scope, nodes beam.PCollection) *KytheBeam { 132 return &KytheBeam{s: s, nodes: nodes} 133 } 134 135 // FromEntries creates a KytheBeam pipeline from an input collection of 136 // *spb.Entry messages. 137 func FromEntries(s beam.Scope, entries beam.PCollection) *KytheBeam { 138 return FromNodes(s, nodes.FromEntries(s, entries)) 139 } 140 141 func keyNode(n *scpb.Node) (*spb.VName, *scpb.Node) { return n.Source, n } 142 143 // SplitCrossReferences returns a columnar Kythe cross-references table derived 144 // from the Kythe input graph. The beam.PCollection has elements of type 145 // KV<[]byte, []byte>. 146 func (k *KytheBeam) SplitCrossReferences() beam.PCollection { 147 s := k.s.Scope("SplitCrossReferences") 148 149 refs := beam.ParDo(s, refToCrossRef, k.References()) 150 idx := beam.ParDo(s, nodeToCrossRef, beam.CoGroupByKey(s, 151 beam.ParDo(s, keyNode, k.Nodes()), 152 k.getMarkedSources(), 153 // TODO(schroederc): merge_with 154 )) 155 156 callgraph := k.callGraph() 157 158 edges := k.edgeRelations() 159 relatedDefs := beam.ParDo(s, emitRelatedDefs, beam.CoGroupByKey(s, 160 k.directDefinitions(), 161 beam.ParDo(s, splitEdge, filter.Distinct(s, beam.ParDo(s, bareRevEdge, edges))), 162 )) 163 relations := beam.ParDo(s, edgeToCrossRefRelation, edges) 164 165 return beam.ParDo(s, encodeCrossRef, beam.Flatten(s, 166 idx, 167 refs, 168 relations, 169 relatedDefs, 170 callgraph, 171 )) 172 } 173 174 func (k *KytheBeam) callGraph() beam.PCollection { 175 s := k.s.Scope("CallGraph") 176 callsites := beam.ParDo(s, refToCallsite, k.References()) 177 // TODO(schroederc): override callers 178 callers := beam.ParDo(s, constructCaller, beam.CoGroupByKey(s, 179 k.directDefinitions(), 180 k.getMarkedSources(), 181 beam.ParDo(s, splitEdge, filter.Distinct(s, beam.ParDo(s, callEdge, callsites))), 182 )) 183 return beam.Flatten(s, callsites, callers) 184 } 185 186 func emitRelatedDefs(target *spb.VName, defStream func(**srvpb.ExpandedAnchor) bool, srcStream func(**spb.VName) bool, emit func(*xspb.CrossReferences)) { 187 var def *srvpb.ExpandedAnchor 188 if !defStream(&def) { 189 return // no related node definition found 190 } 191 nodeDef := &xspb.CrossReferences_NodeDefinition_{&xspb.CrossReferences_NodeDefinition{ 192 Node: target, 193 Location: def, 194 }} 195 196 var src *spb.VName 197 for srcStream(&src) { 198 emit(&xspb.CrossReferences{Source: src, Entry: nodeDef}) 199 } 200 } 201 202 func bareRevEdge(eg *gspb.Edges, emit func(*scpb.Edge)) error { 203 switch e := eg.Entry.(type) { 204 case *gspb.Edges_Edge_: 205 edge := e.Edge 206 emit(&scpb.Edge{Target: eg.Source, Source: edge.Target}) 207 } 208 return nil 209 } 210 211 func constructCaller(caller *spb.VName, defStream func(**srvpb.ExpandedAnchor) bool, msStream func(**cpb.MarkedSource) bool, calleeStream func(**spb.VName) bool, emit func(*xspb.CrossReferences)) { 212 var def *srvpb.ExpandedAnchor 213 if !defStream(&def) { 214 return // no caller definition found 215 } 216 var ms *cpb.MarkedSource 217 for msStream(&ms) { 218 break 219 } 220 221 var callee *spb.VName 222 for calleeStream(&callee) { 223 emit(&xspb.CrossReferences{ 224 Source: callee, 225 Entry: &xspb.CrossReferences_Caller_{&xspb.CrossReferences_Caller{ 226 Caller: caller, 227 Location: def, 228 MarkedSource: ms, 229 }}, 230 }) 231 } 232 } 233 234 func refToCallsite(r *ppb.Reference, emit func(*xspb.CrossReferences)) { 235 if r.GetKytheKind() != scpb.EdgeKind_REF_CALL || r.Scope == nil { 236 return 237 } 238 emit(&xspb.CrossReferences{ 239 Source: r.Source, 240 Entry: &xspb.CrossReferences_Callsite_{&xspb.CrossReferences_Callsite{ 241 Kind: xspb.CrossReferences_Callsite_DIRECT, 242 Caller: r.Scope, 243 Location: r.Anchor, 244 }}, 245 }) 246 } 247 248 func callEdge(x *xspb.CrossReferences) *scpb.Edge { 249 return &scpb.Edge{Source: x.GetCallsite().GetCaller(), Target: x.GetSource()} 250 } 251 252 func edgeToCrossRefRelation(eg *gspb.Edges, emit func(*xspb.CrossReferences)) error { 253 switch e := eg.Entry.(type) { 254 case *gspb.Edges_Edge_: 255 edge := e.Edge 256 r := &xspb.CrossReferences_Relation{ 257 Ordinal: edge.Ordinal, 258 Reverse: edge.Reverse, 259 Node: edge.Target, 260 } 261 if k := edge.GetGenericKind(); k != "" { 262 r.Kind = &xspb.CrossReferences_Relation_GenericKind{k} 263 } else { 264 r.Kind = &xspb.CrossReferences_Relation_KytheKind{edge.GetKytheKind()} 265 } 266 emit(&xspb.CrossReferences{ 267 Source: eg.Source, 268 Entry: &xspb.CrossReferences_Relation_{r}, 269 }) 270 return nil 271 case *gspb.Edges_Target_: 272 target := e.Target 273 emit(&xspb.CrossReferences{ 274 Source: eg.Source, 275 Entry: &xspb.CrossReferences_RelatedNode_{&xspb.CrossReferences_RelatedNode{ 276 Node: target.Node, 277 }}, 278 }) 279 return nil 280 default: 281 return fmt.Errorf("unexpected Edges entry: %T", e) 282 } 283 } 284 285 // CrossReferences returns a Kythe file decorations table derived from the Kythe 286 // input graph. The beam.PCollections have elements of type 287 // KV<string, *srvpb.PagedCrossReferences> and 288 // KV<string, *srvpb.PagedCrossReferences_Page>, respectively. 289 func (k *KytheBeam) CrossReferences() (sets, pages beam.PCollection) { 290 s := k.s.Scope("CrossReferences") 291 refs := beam.CoGroupByKey(s, 292 beam.ParDo(s, keyRef, k.References()), 293 beam.ParDo(s, keyCrossRef, k.callGraph()), 294 ) 295 // TODO(schroederc): related nodes 296 // TODO(schroederc): MarkedSource 297 // TODO(schroederc): source_node 298 return beam.ParDo2(s, groupCrossRefs, refs) 299 } 300 301 var callerKinds = map[xspb.CrossReferences_Callsite_Kind]string{ 302 xspb.CrossReferences_Callsite_DIRECT: "#internal/ref/call/direct", 303 xspb.CrossReferences_Callsite_OVERRIDE: "#internal/ref/call/override", 304 } 305 306 // groupCrossRefs emits *srvpb.PagedCrossReferences and *srvpb.PagedCrossReferences_Pages for a 307 // single node's collection of *ppb.References and callsites. 308 func groupCrossRefs( 309 key *spb.VName, 310 refStream func(**ppb.Reference) bool, 311 callStream func(**xspb.CrossReferences) bool, 312 emitSet func(string, *srvpb.PagedCrossReferences), 313 emitPage func(string, *srvpb.PagedCrossReferences_Page)) { 314 set := &srvpb.PagedCrossReferences{SourceTicket: kytheuri.ToString(key)} 315 // TODO(schroederc): add paging 316 317 // kind -> build_config -> group 318 groups := make(map[string]map[string]*srvpb.PagedCrossReferences_Group) 319 320 var ref *ppb.Reference 321 for refStream(&ref) { 322 kind := refKind(ref) 323 configs, ok := groups[kind] 324 if !ok { 325 configs = make(map[string]*srvpb.PagedCrossReferences_Group) 326 groups[kind] = configs 327 } 328 config := ref.Anchor.BuildConfiguration 329 g, ok := configs[config] 330 if !ok { 331 g = &srvpb.PagedCrossReferences_Group{Kind: kind, BuildConfig: config} 332 configs[config] = g 333 set.Group = append(set.Group, g) 334 } 335 g.Anchor = append(g.Anchor, ref.Anchor) 336 } 337 338 callers := make(map[string]*xspb.CrossReferences_Caller) 339 callsites := make(map[string][]*xspb.CrossReferences_Callsite) 340 var call *xspb.CrossReferences 341 for callStream(&call) { 342 switch e := call.Entry.(type) { 343 case *xspb.CrossReferences_Caller_: 344 callers[kytheuri.ToString(e.Caller.Caller)] = e.Caller 345 case *xspb.CrossReferences_Callsite_: 346 ticket := kytheuri.ToString(e.Callsite.Caller) 347 callsites[ticket] = append(callsites[ticket], e.Callsite) 348 } 349 } 350 for ticket, caller := range callers { 351 for _, site := range callsites[ticket] { 352 kind := callerKinds[site.Kind] 353 configs, ok := groups[kind] 354 if !ok { 355 configs = make(map[string]*srvpb.PagedCrossReferences_Group) 356 groups[kind] = configs 357 } 358 config := site.Location.BuildConfiguration 359 g, ok := configs[config] 360 if !ok { 361 g = &srvpb.PagedCrossReferences_Group{ 362 Kind: kind, 363 BuildConfig: config, 364 } 365 configs[config] = g 366 set.Group = append(set.Group, g) 367 } 368 369 var groupCaller *srvpb.PagedCrossReferences_Caller 370 for _, c := range g.Caller { 371 if c.SemanticCaller == ticket { 372 groupCaller = c 373 break 374 } 375 } 376 if groupCaller == nil { 377 groupCaller = &srvpb.PagedCrossReferences_Caller{ 378 Caller: caller.Location, 379 SemanticCaller: ticket, 380 MarkedSource: caller.MarkedSource, 381 } 382 g.Caller = append(g.Caller, groupCaller) 383 } 384 groupCaller.Callsite = append(groupCaller.Callsite, site.Location) 385 } 386 } 387 388 sort.Slice(set.Group, func(i, j int) bool { 389 return compare.Strings(set.Group[i].BuildConfig, set.Group[j].BuildConfig). 390 AndThen(set.Group[i].Kind, set.Group[j].Kind) == compare.LT 391 }) 392 for _, g := range set.Group { 393 sort.Slice(g.Anchor, func(i, j int) bool { return g.Anchor[i].Ticket < g.Anchor[j].Ticket }) 394 for _, caller := range g.Caller { 395 sort.Slice(caller.Callsite, func(i, j int) bool { return caller.Callsite[i].Ticket < caller.Callsite[j].Ticket }) 396 } 397 } 398 399 emitSet("xrefs:"+set.SourceTicket, set) 400 } 401 402 func keyRef(r *ppb.Reference) (*spb.VName, *ppb.Reference) { 403 return r.Source, &ppb.Reference{ 404 Kind: r.Kind, 405 Anchor: r.Anchor, 406 } 407 } 408 409 func keyCrossRef(xr *xspb.CrossReferences) (*spb.VName, *xspb.CrossReferences) { 410 return xr.Source, &xspb.CrossReferences{Entry: xr.Entry} 411 } 412 413 func (k *KytheBeam) decorationPieces(s beam.Scope) beam.PCollection { 414 decor := beam.ParDo(s, refToDecorPiece, k.References()) 415 416 targets := beam.ParDo(s, targetToFile, decor) 417 bareNodes := beam.ParDo(s, &nodes.Filter{IncludeEdges: []string{}}, k.nodes) 418 419 files := beam.ParDo(s, fileToDecorPiece, k.getFiles()) 420 targetNodes := beam.ParDo(s, nodeToDecorPiece, 421 beam.CoGroupByKey(s, beam.ParDo(s, moveSourceToKey, bareNodes), targets)) 422 defs := beam.ParDo(s, defToDecorPiece, 423 beam.CoGroupByKey(s, k.directDefinitions(), targets)) 424 overrides := k.overrides(targets) 425 decorDiagnostics := k.diagnostics() 426 427 return beam.Flatten(s, decor, files, targetNodes, defs, decorDiagnostics, overrides) 428 } 429 430 func (k *KytheBeam) overrides(targets beam.PCollection) beam.PCollection { 431 s := k.s.Scope("Overrides") 432 overriddenToEdge := beam.Seq(s, k.Nodes(), &nodes.Filter{IncludeEdges: []string{edges.Overrides, edges.Extends, edges.OverridesTransitive, edges.Satisfies}}, nodeToEdges) 433 overridingToDecor := beam.ParDo(s, overriddenToDecor, beam.CoGroupByKey(s, k.directDefinitions(), overriddenToEdge)) 434 return beam.ParDo(s, overridingToFile, beam.CoGroupByKey(s, targets, overridingToDecor)) 435 } 436 437 func overriddenToDecor(overridden *spb.VName, overriddenAnchors func(**srvpb.ExpandedAnchor) bool, edgeStream func(**scpb.Edge) bool, emit func(*spb.VName, *ppb.DecorationPiece)) { 438 var overriddenAnchor *srvpb.ExpandedAnchor 439 var e *scpb.Edge 440 if !overriddenAnchors(&overriddenAnchor) { 441 return 442 } 443 for edgeStream(&e) { 444 var kind srvpb.FileDecorations_Override_Kind 445 edgeKindString := schema.EdgeKindString(e.GetKytheKind()) 446 if edges.IsVariant(edgeKindString, edges.Overrides) { 447 kind = srvpb.FileDecorations_Override_OVERRIDES 448 } else if edges.IsVariant(edgeKindString, edges.Extends) || edges.IsVariant(edgeKindString, edges.Satisfies) { 449 kind = srvpb.FileDecorations_Override_EXTENDS 450 } else { 451 continue 452 } 453 emit(e.Source, &ppb.DecorationPiece{ 454 Piece: &ppb.DecorationPiece_TargetOverride{ 455 TargetOverride: &xspb.FileDecorations_TargetOverride{ 456 Overriding: e.Source, 457 Overridden: e.Target, 458 Kind: kind, 459 OverridingDefinition: overriddenAnchor, 460 }, 461 }, 462 }) 463 } 464 } 465 466 func overridingToFile(target *spb.VName, files func(**spb.VName) bool, overrides func(**ppb.DecorationPiece) bool, emit func(*spb.VName, *ppb.DecorationPiece)) { 467 var file *spb.VName 468 if !files(&file) { 469 return 470 } 471 var override *ppb.DecorationPiece 472 for overrides(&override) { 473 emit(file, override) 474 } 475 } 476 477 func (k *KytheBeam) diagnostics() beam.PCollection { 478 s := k.s.Scope("Diagnostics") 479 diagnostics := beam.Seq(s, k.Nodes(), &nodes.Filter{ 480 FilterByKind: []string{kinds.Diagnostic}, 481 IncludeFacts: []string{facts.Message, facts.Details, facts.ContextURL}, 482 }, nodeToDiagnostic) 483 refTags := beam.ParDo(s, refToTag, k.References()) 484 fileTags := beam.Seq(s, k.Nodes(), &nodes.Filter{ 485 FilterByKind: []string{kinds.File}, 486 IncludeFacts: []string{}, 487 IncludeEdges: []string{edges.Tagged}, 488 }, fileToTags) 489 return beam.ParDo(s, diagToDecor, beam.CoGroupByKey(s, diagnostics, refTags, fileTags)) 490 } 491 492 func fileToTags(n *scpb.Node, emit func(*spb.VName, *spb.VName)) { 493 for _, e := range n.Edge { 494 emit(e.Target, n.Source) 495 } 496 } 497 498 func diagToDecor(src *spb.VName, diagStream func(**cpb.Diagnostic) bool, refTagStream func(**srvpb.ExpandedAnchor) bool, fileTagStream func(**spb.VName) bool, emit func(*spb.VName, *ppb.DecorationPiece)) error { 499 var d *cpb.Diagnostic 500 if !diagStream(&d) { 501 return nil 502 } 503 504 var ref *srvpb.ExpandedAnchor 505 for refTagStream(&ref) { 506 uri, err := kytheuri.Parse(ref.Ticket) 507 if err != nil { 508 return err 509 } 510 file := &spb.VName{ 511 Corpus: uri.Corpus, 512 Root: uri.Root, 513 Path: uri.Path, 514 } 515 diagWithSpan := *d 516 diagWithSpan.Span = ref.Span 517 emit(file, &ppb.DecorationPiece{ 518 Piece: &ppb.DecorationPiece_Diagnostic{ 519 Diagnostic: &diagWithSpan, 520 }, 521 }) 522 } 523 524 var file *spb.VName 525 for fileTagStream(&file) { 526 emit(file, &ppb.DecorationPiece{ 527 Piece: &ppb.DecorationPiece_Diagnostic{Diagnostic: d}, 528 }) 529 } 530 531 return nil 532 } 533 534 func refToTag(r *ppb.Reference, emit func(*spb.VName, *srvpb.ExpandedAnchor)) { 535 if r.GetKytheKind() != scpb.EdgeKind_TAGGED { 536 return 537 } 538 emit(r.Source, r.Anchor) 539 } 540 541 func nodeToDiagnostic(n *scpb.Node) (*spb.VName, *cpb.Diagnostic) { 542 d := &cpb.Diagnostic{} 543 for _, f := range n.Fact { 544 switch f.GetKytheName() { 545 case scpb.FactName_MESSAGE: 546 d.Message = string(f.Value) 547 case scpb.FactName_DETAILS: 548 d.Details = string(f.Value) 549 case scpb.FactName_CONTEXT_URL: 550 d.ContextUrl = string(f.Value) 551 } 552 } 553 return n.Source, d 554 } 555 556 // SplitDecorations returns a columnar Kythe file decorations table derived from 557 // the Kythe input graph. The beam.PCollection has elements of type 558 // KV<[]byte, []byte>. 559 func (k *KytheBeam) SplitDecorations() beam.PCollection { 560 s := k.s.Scope("SplitDecorations") 561 return beam.ParDo(s, encodeDecorPiece, k.decorationPieces(s)) 562 } 563 564 // Decorations returns a Kythe file decorations table derived from the Kythe 565 // input graph. The beam.PCollection has elements of type 566 // KV<string, *srvpb.FileDecorations>. 567 func (k *KytheBeam) Decorations() beam.PCollection { 568 s := k.s.Scope("Decorations") 569 pieces := k.decorationPieces(s) 570 return beam.ParDo(s, &ticketKey{"decor:"}, beam.CombinePerKey(s, &combineDecorPieces{}, pieces)) 571 } 572 573 type ticketKey struct{ Prefix string } 574 575 func (t *ticketKey) ProcessElement(key *spb.VName, val beam.T) (string, beam.T) { 576 return t.Prefix + kytheuri.ToString(key), val 577 } 578 579 func targetToFile(file *spb.VName, p *ppb.DecorationPiece) (*spb.VName, *spb.VName, error) { 580 return p.GetReference().Source, file, nil 581 } 582 583 // combineDecorPieces combines *ppb.DecorationPieces into a single *srvpb.FileDecorations. 584 type combineDecorPieces struct{} 585 586 func (c *combineDecorPieces) CreateAccumulator() *srvpb.FileDecorations { 587 return &srvpb.FileDecorations{} 588 } 589 590 func (c *combineDecorPieces) MergeAccumulators(accum, n *srvpb.FileDecorations) *srvpb.FileDecorations { 591 accum.Decoration = append(accum.Decoration, n.Decoration...) 592 if accum.File == nil { 593 accum.File = n.File 594 } 595 accum.Target = append(accum.Target, n.Target...) 596 accum.TargetDefinitions = append(accum.TargetDefinitions, n.TargetDefinitions...) 597 accum.Diagnostic = append(accum.Diagnostic, n.Diagnostic...) 598 return accum 599 } 600 601 func (c *combineDecorPieces) AddInput(accum *srvpb.FileDecorations, p *ppb.DecorationPiece) *srvpb.FileDecorations { 602 switch p := p.Piece.(type) { 603 case *ppb.DecorationPiece_Reference: 604 ref := p.Reference 605 accum.Decoration = append(accum.Decoration, &srvpb.FileDecorations_Decoration{ 606 Anchor: &srvpb.RawAnchor{ 607 StartOffset: ref.Anchor.Span.Start.ByteOffset, 608 EndOffset: ref.Anchor.Span.End.ByteOffset, 609 610 BuildConfiguration: ref.Anchor.BuildConfiguration, 611 }, 612 Kind: refKind(ref), 613 Target: kytheuri.ToString(ref.Source), 614 }) 615 case *ppb.DecorationPiece_File: 616 accum.File = p.File 617 case *ppb.DecorationPiece_Node: 618 accum.Target = append(accum.Target, convertPipelineNode(p.Node)) 619 case *ppb.DecorationPiece_Definition_: 620 // TODO(schroederc): redesign *srvpb.FileDecorations to not need invasive 621 // changes to add a node's definition 622 def := p.Definition 623 accum.TargetDefinitions = append(accum.TargetDefinitions, def.Definition) 624 // Add a marker to associate the definition and node. ExtractOutput will 625 // later embed the definition within accum.Target/accum.TargetOverride. 626 accum.Target = append(accum.Target, &srvpb.Node{ 627 Ticket: kytheuri.ToString(def.Node), 628 DefinitionLocation: &srvpb.ExpandedAnchor{Ticket: def.Definition.Ticket}, 629 }) 630 case *ppb.DecorationPiece_Diagnostic: 631 accum.Diagnostic = append(accum.Diagnostic, p.Diagnostic) 632 case *ppb.DecorationPiece_TargetOverride: 633 accum.TargetOverride = append(accum.TargetOverride, &srvpb.FileDecorations_Override{ 634 Overriding: kytheuri.ToString(p.TargetOverride.Overriding), 635 Overridden: kytheuri.ToString(p.TargetOverride.Overridden), 636 OverriddenDefinition: p.TargetOverride.OverridingDefinition.Ticket, 637 Kind: p.TargetOverride.Kind, 638 }) 639 default: 640 panic(fmt.Errorf("unhandled DecorationPiece: %T", p)) 641 } 642 return accum 643 } 644 645 func convertPipelineNode(node *scpb.Node) *srvpb.Node { 646 n := &srvpb.Node{Ticket: kytheuri.ToString(node.Source)} 647 if kind := schema.GetNodeKind(node); kind != "" { 648 n.Fact = append(n.Fact, &cpb.Fact{ 649 Name: facts.NodeKind, 650 Value: []byte(kind), 651 }) 652 } 653 if subkind := schema.GetSubkind(node); subkind != "" { 654 n.Fact = append(n.Fact, &cpb.Fact{ 655 Name: facts.Subkind, 656 Value: []byte(subkind), 657 }) 658 } 659 for _, f := range node.Fact { 660 n.Fact = append(n.Fact, &cpb.Fact{ 661 Name: schema.GetFactName(f), 662 Value: f.Value, 663 }) 664 } 665 sort.Slice(n.Fact, func(i, j int) bool { return n.Fact[i].Name < n.Fact[j].Name }) 666 return n 667 } 668 669 func (c *combineDecorPieces) ExtractOutput(fd *srvpb.FileDecorations) *srvpb.FileDecorations { 670 // Embed definitions for Decorations and Overrides 671 for i := len(fd.Target) - 1; i >= 0; i-- { 672 if fd.Target[i].DefinitionLocation == nil { 673 continue 674 } 675 node, def := fd.Target[i].Ticket, fd.Target[i].DefinitionLocation.Ticket 676 fd.Target = append(fd.Target[:i], fd.Target[i+1:]...) 677 678 for _, d := range fd.Decoration { 679 if d.Target == node { 680 d.TargetDefinition = def 681 } 682 } 683 for _, o := range fd.TargetOverride { 684 if o.Overridden == node { 685 o.OverriddenDefinition = def 686 } 687 } 688 } 689 690 sort.Slice(fd.Decoration, func(i, j int) bool { 691 if c := compare.Ints(int(fd.Decoration[i].Anchor.StartOffset), int(fd.Decoration[j].Anchor.StartOffset)); c != compare.EQ { 692 return c == compare.LT 693 } else if c := compare.Ints(int(fd.Decoration[i].Anchor.EndOffset), int(fd.Decoration[j].Anchor.EndOffset)); c != compare.EQ { 694 return c == compare.LT 695 } else if c := compare.Strings(fd.Decoration[i].Kind, fd.Decoration[j].Kind); c != compare.EQ { 696 return c == compare.LT 697 } 698 return fd.Decoration[i].Target < fd.Decoration[j].Target 699 }) 700 sort.Slice(fd.Target, func(i, j int) bool { return fd.Target[i].Ticket < fd.Target[j].Ticket }) 701 sort.Slice(fd.TargetDefinitions, func(i, j int) bool { return fd.TargetDefinitions[i].Ticket < fd.TargetDefinitions[j].Ticket }) 702 703 sort.Slice(fd.Diagnostic, func(i, j int) bool { 704 a, b := fd.Diagnostic[i], fd.Diagnostic[j] 705 return compare.Compare(a.Span.GetStart().GetByteOffset(), b.Span.GetStart().GetByteOffset()). 706 AndThen(a.Span.GetEnd().GetByteOffset(), b.Span.GetEnd().GetByteOffset()). 707 AndThen(a.Message, b.Message) == compare.LT 708 }) 709 return fd 710 } 711 712 func fileToDecorPiece(src *spb.VName, f *srvpb.File) (*spb.VName, *ppb.DecorationPiece) { 713 return src, &ppb.DecorationPiece{Piece: &ppb.DecorationPiece_File{f}} 714 } 715 716 func refToDecorPiece(r *ppb.Reference, emit func(*spb.VName, *ppb.DecorationPiece)) error { 717 if r.GetKytheKind() == scpb.EdgeKind_TAGGED { 718 return nil 719 } 720 p := &ppb.DecorationPiece{ 721 Piece: &ppb.DecorationPiece_Reference{&ppb.Reference{ 722 Source: r.Source, 723 Kind: r.Kind, 724 Anchor: r.Anchor, 725 }}, 726 } 727 file, err := anchorToFileVName(r.Anchor.Ticket) 728 if err != nil { 729 return err 730 } 731 emit(file, p) 732 return nil 733 } 734 735 func anchorToFileVName(anchorTicket string) (*spb.VName, error) { 736 anchor, err := kytheuri.ToVName(anchorTicket) 737 if err != nil { 738 return nil, err 739 } 740 return fileVName(anchor), nil 741 } 742 743 func fileVName(anchor *spb.VName) *spb.VName { 744 return &spb.VName{ 745 Corpus: anchor.Corpus, 746 Root: anchor.Root, 747 Path: anchor.Path, 748 } 749 } 750 751 func nodeToDecorPiece(key *spb.VName, node func(**scpb.Node) bool, file func(**spb.VName) bool, emit func(*spb.VName, *ppb.DecorationPiece)) { 752 var n, singleNode *scpb.Node 753 for node(&n) { 754 singleNode = n 755 } 756 if singleNode == nil { 757 return 758 } 759 760 piece := &ppb.DecorationPiece{ 761 Piece: &ppb.DecorationPiece_Node{&scpb.Node{ 762 Source: key, 763 Kind: singleNode.Kind, 764 Subkind: singleNode.Subkind, 765 Fact: singleNode.Fact, 766 Edge: singleNode.Edge, 767 }}, 768 } 769 770 var f *spb.VName 771 for file(&f) { 772 emit(f, piece) 773 } 774 } 775 776 func defToDecorPiece(node *spb.VName, defs func(**srvpb.ExpandedAnchor) bool, file func(**spb.VName) bool, emit func(*spb.VName, *ppb.DecorationPiece)) { 777 var def *srvpb.ExpandedAnchor 778 for defs(&def) { 779 // TODO(schroederc): select ambiguous definition better 780 break // pick first known definition 781 } 782 if def == nil { 783 return 784 } 785 piece := &ppb.DecorationPiece{ 786 Piece: &ppb.DecorationPiece_Definition_{&ppb.DecorationPiece_Definition{ 787 Node: node, 788 Definition: def, 789 }}, 790 } 791 var f *spb.VName 792 for file(&f) { 793 emit(f, piece) 794 } 795 } 796 797 // Nodes returns all *scpb.Nodes from the Kythe input graph. 798 func (k *KytheBeam) Nodes() beam.PCollection { return k.nodes } 799 800 // References returns all derived *ppb.References from the Kythe input graph. 801 func (k *KytheBeam) References() beam.PCollection { 802 if k.refs.IsValid() { 803 return k.refs 804 } 805 s := k.s.Scope("References") 806 anchors := beam.ParDo(s, keyByPath, beam.ParDo(s, 807 &nodes.Filter{ 808 FilterByKind: []string{kinds.Anchor}, 809 IncludeFacts: []string{ 810 facts.AnchorStart, facts.AnchorEnd, 811 facts.SnippetStart, facts.SnippetEnd, 812 facts.BuildConfig, 813 }, 814 }, k.nodes)) 815 k.refs = beam.ParDo(s, toRefs, beam.CoGroupByKey(s, k.getFiles(), anchors)) 816 return k.refs 817 } 818 819 func (k *KytheBeam) getFiles() beam.PCollection { 820 if !k.files.IsValid() { 821 fileNodes := beam.ParDo(k.s, 822 &nodes.Filter{ 823 FilterByKind: []string{kinds.File}, 824 IncludeFacts: []string{facts.Text, facts.TextEncoding}, 825 }, k.nodes) 826 k.files = beam.ParDo(k.s, toFiles, fileNodes) 827 } 828 return k.files 829 } 830 831 func keyByPath(n *scpb.Node) (*spb.VName, *scpb.Node) { 832 return &spb.VName{Corpus: n.Source.Corpus, Root: n.Source.Root, Path: n.Source.Path}, n 833 } 834 835 func toRefs(p *spb.VName, file func(**srvpb.File) bool, anchor func(**scpb.Node) bool, emit func(*ppb.Reference)) error { 836 var f *srvpb.File 837 if !file(&f) { 838 return nil 839 } 840 return normalizeAnchors(f, anchor, emit) 841 } 842 843 func toFiles(n *scpb.Node) (*spb.VName, *srvpb.File) { 844 var f srvpb.File 845 for _, fact := range n.Fact { 846 switch fact.GetKytheName() { 847 case scpb.FactName_TEXT: 848 f.Text = fact.Value 849 case scpb.FactName_TEXT_ENCODING: 850 f.Encoding = string(fact.Value) 851 } 852 } 853 return n.Source, &f 854 } 855 856 func normalizeAnchors(file *srvpb.File, anchor func(**scpb.Node) bool, emit func(*ppb.Reference)) error { 857 norm := span.NewNormalizer(file.Text) 858 var n *scpb.Node 859 for anchor(&n) { 860 raw, err := toRawAnchor(n) 861 if err != nil { 862 return err 863 } 864 a, err := assemble.ExpandAnchor(raw, file, norm, "") 865 if err != nil { 866 log.Errorf("expanding anchor {%+v}: %v", raw, err) 867 break 868 } 869 870 var parent *spb.VName 871 for _, e := range n.Edge { 872 if e.GetKytheKind() == scpb.EdgeKind_CHILD_OF { 873 // There should only be a single parent for each anchor. 874 parent = e.Target 875 break 876 } 877 } 878 879 for _, e := range n.Edge { 880 if e.GetKytheKind() == scpb.EdgeKind_CHILD_OF { 881 continue 882 } 883 ref := &ppb.Reference{ 884 Source: e.Target, 885 Anchor: a, 886 Scope: parent, 887 } 888 if k := e.GetKytheKind(); k == scpb.EdgeKind_UNKNOWN_EDGE_KIND { 889 ref.Kind = &ppb.Reference_GenericKind{e.GetGenericKind()} 890 } else { 891 ref.Kind = &ppb.Reference_KytheKind{k} 892 } 893 emit(ref) 894 } 895 } 896 return nil 897 } 898 899 func toRawAnchor(n *scpb.Node) (*srvpb.RawAnchor, error) { 900 var a srvpb.RawAnchor 901 for _, f := range n.Fact { 902 var err error 903 switch f.GetKytheName() { 904 case scpb.FactName_BUILD_CONFIG: 905 a.BuildConfiguration = string(f.Value) 906 case scpb.FactName_LOC_START: 907 a.StartOffset, err = factValueToInt(f) 908 case scpb.FactName_LOC_END: 909 a.EndOffset, err = factValueToInt(f) 910 case scpb.FactName_SNIPPET_START: 911 a.SnippetStart, err = factValueToInt(f) 912 case scpb.FactName_SNIPPET_END: 913 a.SnippetEnd, err = factValueToInt(f) 914 default: 915 return nil, fmt.Errorf("unhandled fact: %v", f) 916 } 917 if err != nil { 918 return nil, err 919 } 920 } 921 a.Ticket = kytheuri.ToString(n.Source) 922 return &a, nil 923 } 924 925 func factValueToInt(f *scpb.Fact) (int32, error) { 926 i, err := strconv.Atoi(string(f.Value)) 927 if err != nil { 928 return 0, fmt.Errorf("invalid integer fact value for %q: %v", schema.GetFactName(f), err) 929 } 930 return int32(i), nil 931 } 932 933 func moveSourceToKey(n *scpb.Node) (*spb.VName, *scpb.Node) { 934 return n.Source, &scpb.Node{ 935 Kind: n.Kind, 936 Subkind: n.Subkind, 937 Fact: n.Fact, 938 Edge: n.Edge, 939 } 940 } 941 942 func (k *KytheBeam) directDefinitions() beam.PCollection { 943 s := k.s.Scope("DirectDefinitions") 944 return beam.ParDo(s, toDefinition, k.References()) 945 } 946 947 func toDefinition(r *ppb.Reference, emit func(*spb.VName, *srvpb.ExpandedAnchor)) error { 948 if edges.IsVariant(refKind(r), edges.Defines) { 949 emit(r.Source, r.Anchor) 950 } 951 return nil 952 } 953 954 func refKind(r *ppb.Reference) string { 955 if k := r.GetKytheKind(); k != scpb.EdgeKind_UNKNOWN_EDGE_KIND { 956 return schema.EdgeKindString(k) 957 } 958 return r.GetGenericKind() 959 } 960 961 // Edges returns a Kythe edges table derived from the Kythe input graph. The beam.PCollections have 962 // elements of type KV<string, *srvpb.PagedEdgeSet> and KV<string, *srvpb.EdgePage>, respectively. 963 func (k *KytheBeam) Edges() (beam.PCollection, beam.PCollection) { 964 s := k.s.Scope("Edges") 965 966 nodes := beam.ParDo(s, moveSourceToKey, k.nodes) 967 edges := beam.ParDo(s, reverseEdge, beam.CoGroupByKey(s, nodes, beam.ParDo(s, nodeToEdges, k.nodes))) 968 rev := beam.ParDo(s, nodeToReverseEdges, k.nodes) 969 970 return beam.ParDo2(s, groupEdges, beam.CoGroupByKey(s, nodes, edges, rev)) 971 } 972 973 // edgeRelations returns a beam.PCollection of gspb.Edges for all Kythe graph 974 // relations. 975 func (k *KytheBeam) edgeRelations() beam.PCollection { 976 if !k.edges.IsValid() { 977 s := k.s.Scope("Relations") 978 979 nodeEdges := beam.Seq(s, k.nodes, filterAnchorNodes, &nodes.Filter{IncludeFacts: []string{}}) 980 sourceNodes := beam.ParDo(s, moveSourceToKey, k.nodes) 981 982 targetNodes := beam.ParDo(s, encodeEdgeTarget, beam.CoGroupByKey(s, 983 sourceNodes, 984 beam.ParDo(s, splitEdge, filter.Distinct(s, beam.ParDo(s, edgeTargets, nodeEdges))))) 985 edges := beam.ParDo(s, encodeEdges, nodeEdges) 986 987 k.edges = beam.Flatten(s, edges, targetNodes) 988 } 989 return k.edges 990 } 991 992 // SplitEdges returns a columnar Kythe edges table derived from the Kythe input 993 // graph. The beam.PCollection have elements of type KV<[]byte, []byte>. 994 func (k *KytheBeam) SplitEdges() beam.PCollection { 995 s := k.s.Scope("SplitEdges") 996 997 idx := beam.ParDo(s, combineEdgesIndex, 998 // TODO(schroederc): counts; also needed for presence with only rev edges 999 beam.ParDo(s, keyNode, beam.ParDo(s, &nodes.Filter{IncludeEdges: []string{}}, k.Nodes()))) 1000 1001 return beam.ParDo(s, encodeEdgesEntry, beam.Flatten(s, idx, k.edgeRelations())) 1002 } 1003 1004 func filterAnchorNodes(n *scpb.Node, emit func(*scpb.Node)) { 1005 if n.GetKytheKind() == scpb.NodeKind_ANCHOR { 1006 return 1007 } 1008 emit(n) 1009 } 1010 1011 func edgeTargets(n *scpb.Node, emit func(*scpb.Edge)) { 1012 for _, e := range n.Edge { 1013 emit(&scpb.Edge{Source: n.Source, Target: e.Target}) 1014 emit(&scpb.Edge{Target: n.Source, Source: e.Target}) 1015 } 1016 } 1017 1018 func splitEdge(e *scpb.Edge) (*spb.VName, *spb.VName) { return e.Source, e.Target } 1019 1020 func combineEdgesIndex(src *spb.VName, node *scpb.Node) *gspb.Edges { 1021 return &gspb.Edges{ 1022 Source: src, 1023 Entry: &gspb.Edges_Index_{&gspb.Edges_Index{ 1024 Node: node, 1025 }}, 1026 } 1027 } 1028 1029 // nodeToReverseEdges emits an *scpb.Edge with its SourceNode populated for each of n's edges. The 1030 // key for each *scpb.Edge is its Target VName. 1031 func nodeToReverseEdges(n *scpb.Node, emit func(*spb.VName, *scpb.Edge)) { 1032 node := nodeWithoutEdges(n) 1033 for _, e := range n.Edge { 1034 emit(e.Target, &scpb.Edge{ 1035 SourceNode: node, 1036 Target: e.Target, 1037 Kind: e.Kind, 1038 Ordinal: e.Ordinal, 1039 }) 1040 } 1041 } 1042 1043 // nodeToEdges emits an *scpb.Edge for each of n's edges. The key for each *scpb.Edge is its Target 1044 // VName. 1045 func nodeToEdges(n *scpb.Node, emit func(*spb.VName, *scpb.Edge)) { 1046 for _, e := range n.Edge { 1047 emit(e.Target, &scpb.Edge{ 1048 Source: n.Source, 1049 Target: e.Target, 1050 Kind: e.Kind, 1051 Ordinal: e.Ordinal, 1052 }) 1053 } 1054 } 1055 1056 func nodeWithoutEdges(n *scpb.Node) *scpb.Node { 1057 return &scpb.Node{ 1058 Source: n.Source, 1059 Kind: n.Kind, 1060 Subkind: n.Subkind, 1061 Fact: n.Fact, 1062 } 1063 } 1064 1065 // reverseEdge emits the reverse of each *scpb.Edge, embedding the associated TargetNode. 1066 func reverseEdge(src *spb.VName, nodeStream func(**scpb.Node) bool, edgeStream func(**scpb.Edge) bool, emit func(*spb.VName, *scpb.Edge)) { 1067 var node *scpb.Node 1068 if !nodeStream(&node) { 1069 node = &scpb.Node{} 1070 } else { 1071 node = nodeWithoutEdges(node) 1072 } 1073 node.Source = src 1074 1075 var e *scpb.Edge 1076 for edgeStream(&e) { 1077 emit(e.Source, &scpb.Edge{ 1078 Source: e.Source, 1079 TargetNode: node, 1080 Kind: e.Kind, 1081 Ordinal: e.Ordinal, 1082 }) 1083 } 1084 } 1085 1086 // groupEdges emits *srvpb.PagedEdgeSets and *srvpb.EdgePages for a node and its forward/reverse 1087 // edges. 1088 func groupEdges(src *spb.VName, nodeStream func(**scpb.Node) bool, edgeStream, revStream func(**scpb.Edge) bool, emitSet func(string, *srvpb.PagedEdgeSet), emitPage func(string, *srvpb.EdgePage)) { 1089 set := &srvpb.PagedEdgeSet{} 1090 // TODO(schroederc): paging 1091 1092 var node *scpb.Node 1093 if nodeStream(&node) { 1094 node.Source = src 1095 set.Source = convertPipelineNode(node) 1096 } else { 1097 set.Source = &srvpb.Node{Ticket: kytheuri.ToString(src)} 1098 } 1099 1100 groups := make(map[string]*srvpb.EdgeGroup) 1101 1102 var edge *scpb.Edge 1103 for edgeStream(&edge) { 1104 kind := schema.GetEdgeKind(edge) 1105 g, ok := groups[kind] 1106 if !ok { 1107 g = &srvpb.EdgeGroup{Kind: kind} 1108 groups[kind] = g 1109 set.Group = append(set.Group, g) 1110 } 1111 g.Edge = append(g.Edge, &srvpb.EdgeGroup_Edge{ 1112 Target: convertPipelineNode(edge.TargetNode), 1113 Ordinal: edge.Ordinal, 1114 }) 1115 } 1116 for revStream(&edge) { 1117 kind := "%" + schema.GetEdgeKind(edge) // encode reverse edge kind 1118 g, ok := groups[kind] 1119 if !ok { 1120 g = &srvpb.EdgeGroup{Kind: kind} 1121 groups[kind] = g 1122 set.Group = append(set.Group, g) 1123 } 1124 g.Edge = append(g.Edge, &srvpb.EdgeGroup_Edge{ 1125 Target: convertPipelineNode(edge.SourceNode), 1126 Ordinal: edge.Ordinal, 1127 }) 1128 } 1129 1130 sort.Slice(set.Group, func(i, j int) bool { return set.Group[i].Kind < set.Group[j].Kind }) 1131 for _, g := range set.Group { 1132 sort.Slice(g.Edge, func(i, j int) bool { 1133 return compare.Compare(g.Edge[i].Ordinal, g.Edge[j].Ordinal). 1134 AndThen(g.Edge[i].Target.Ticket, g.Edge[j].Target.Ticket) == compare.LT 1135 }) 1136 } 1137 1138 emitSet("edgeSets:"+set.Source.Ticket, set) 1139 } 1140 1141 func (k *KytheBeam) getMarkedSources() beam.PCollection { 1142 if !k.markedSources.IsValid() { 1143 s := k.s.Scope("MarkedSources") 1144 k.markedSources = beam.Seq(s, k.nodes, &nodes.Filter{ 1145 IncludeFacts: []string{facts.Code}, 1146 IncludeEdges: []string{}, 1147 }, parseMarkedSource) 1148 } 1149 return k.markedSources 1150 } 1151 1152 // Documents returns a Kythe documentation table derived from the Kythe input 1153 // graph. The beam.PCollection has elements of type KV<string, 1154 // *srvpb.Document>. 1155 func (k *KytheBeam) Documents() beam.PCollection { 1156 s := k.s.Scope("Documents") 1157 1158 docs := beam.Seq(s, k.nodes, &nodes.Filter{ 1159 FilterByKind: []string{kinds.Doc}, 1160 IncludeFacts: []string{facts.Text}, 1161 IncludeEdges: []string{edges.Documents}, 1162 }, nodeToDocs) 1163 markedSources := k.getMarkedSources() 1164 children := beam.Seq(s, k.nodes, &nodes.Filter{ 1165 IncludeFacts: []string{}, 1166 IncludeEdges: []string{edges.ChildOf}, 1167 }, nodeToChildren) 1168 1169 return beam.ParDo(s, completeDocument, beam.CoGroupByKey(s, docs, markedSources, children)) 1170 } 1171 1172 // completeDocument emits a single *srvpb.Document per *spb.VName source. 1173 func completeDocument(key *spb.VName, docStream func(**srvpb.Document) bool, msStream func(**cpb.MarkedSource) bool, childStream func(**spb.VName) bool, emit func(string, *srvpb.Document)) { 1174 var doc *srvpb.Document 1175 if !docStream(&doc) { 1176 return 1177 } 1178 doc.Ticket = kytheuri.ToString(key) 1179 1180 msStream(&doc.MarkedSource) // embed MarkedSource, if available 1181 1182 var child *spb.VName 1183 for childStream(&child) { 1184 doc.ChildTicket = append(doc.ChildTicket, kytheuri.ToString(child)) 1185 } 1186 sort.Strings(doc.ChildTicket) 1187 1188 // TODO(schroederc): add definition Links 1189 emit("docs:"+doc.Ticket, doc) 1190 } 1191 1192 // nodeToDocs emits a (*spb.VName, *srvpb.Document) pair for each 1193 // /kythe/edge/documents edges from the given `doc` *scpb.Node. 1194 func nodeToDocs(n *scpb.Node, emit func(*spb.VName, *srvpb.Document)) { 1195 d := &srvpb.Document{} 1196 for _, f := range n.Fact { 1197 if f.GetKytheName() == scpb.FactName_TEXT { 1198 d.RawText = string(f.Value) 1199 break 1200 } 1201 } 1202 1203 for _, e := range n.Edge { 1204 if e.GetKytheKind() == scpb.EdgeKind_DOCUMENTS { 1205 emit(e.Target, d) 1206 } 1207 } 1208 } 1209 1210 // parseMarkedSource parses the /kythe/code fact for each *scpb.Node. 1211 func parseMarkedSource(n *scpb.Node, emit func(*spb.VName, *cpb.MarkedSource)) error { 1212 for _, f := range n.Fact { 1213 if f.GetKytheName() == scpb.FactName_CODE { 1214 var ms cpb.MarkedSource 1215 if err := proto.Unmarshal(f.Value, &ms); err != nil { 1216 return err 1217 } 1218 emit(n.Source, &ms) 1219 break 1220 } 1221 } 1222 return nil 1223 } 1224 1225 // nodeToChildren emits a (parent, child) pair for each /kythe/edge/childof edge 1226 // per *scpb.Node. 1227 func nodeToChildren(n *scpb.Node, emit func(*spb.VName, *spb.VName)) { 1228 for _, e := range n.Edge { 1229 if e.GetKytheKind() == scpb.EdgeKind_CHILD_OF { 1230 emit(e.Target, n.Source) // parent -> child 1231 } 1232 } 1233 }