kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/serving/pipeline/beam.go (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package pipeline
    18  
    19  import (
    20  	"fmt"
    21  	"reflect"
    22  	"sort"
    23  	"strconv"
    24  
    25  	"kythe.io/kythe/go/serving/pipeline/nodes"
    26  	"kythe.io/kythe/go/serving/xrefs/assemble"
    27  	"kythe.io/kythe/go/util/compare"
    28  	"kythe.io/kythe/go/util/kytheuri"
    29  	"kythe.io/kythe/go/util/log"
    30  	"kythe.io/kythe/go/util/schema"
    31  	"kythe.io/kythe/go/util/schema/edges"
    32  	"kythe.io/kythe/go/util/schema/facts"
    33  	kinds "kythe.io/kythe/go/util/schema/nodes"
    34  	"kythe.io/kythe/go/util/span"
    35  
    36  	"github.com/apache/beam/sdks/go/pkg/beam"
    37  	"github.com/apache/beam/sdks/go/pkg/beam/transforms/filter"
    38  	"google.golang.org/protobuf/proto"
    39  
    40  	cpb "kythe.io/kythe/proto/common_go_proto"
    41  	gspb "kythe.io/kythe/proto/graph_serving_go_proto"
    42  	ppb "kythe.io/kythe/proto/pipeline_go_proto"
    43  	scpb "kythe.io/kythe/proto/schema_go_proto"
    44  	srvpb "kythe.io/kythe/proto/serving_go_proto"
    45  	spb "kythe.io/kythe/proto/storage_go_proto"
    46  	xspb "kythe.io/kythe/proto/xref_serving_go_proto"
    47  )
    48  
    49  func init() {
    50  	beam.RegisterFunction(bareRevEdge)
    51  	beam.RegisterFunction(callEdge)
    52  	beam.RegisterFunction(combineEdgesIndex)
    53  	beam.RegisterFunction(completeDocument)
    54  	beam.RegisterFunction(constructCaller)
    55  	beam.RegisterFunction(defToDecorPiece)
    56  	beam.RegisterFunction(diagToDecor)
    57  	beam.RegisterFunction(edgeTargets)
    58  	beam.RegisterFunction(edgeToCrossRefRelation)
    59  	beam.RegisterFunction(emitRelatedDefs)
    60  	beam.RegisterFunction(fileToDecorPiece)
    61  	beam.RegisterFunction(fileToTags)
    62  	beam.RegisterFunction(filterAnchorNodes)
    63  	beam.RegisterFunction(groupCrossRefs)
    64  	beam.RegisterFunction(groupEdges)
    65  	beam.RegisterFunction(keyByPath)
    66  	beam.RegisterFunction(keyCrossRef)
    67  	beam.RegisterFunction(keyNode)
    68  	beam.RegisterFunction(keyRef)
    69  	beam.RegisterFunction(moveSourceToKey)
    70  	beam.RegisterFunction(nodeToChildren)
    71  	beam.RegisterFunction(nodeToDecorPiece)
    72  	beam.RegisterFunction(nodeToDiagnostic)
    73  	beam.RegisterFunction(nodeToDocs)
    74  	beam.RegisterFunction(nodeToEdges)
    75  	beam.RegisterFunction(nodeToReverseEdges)
    76  	beam.RegisterFunction(overriddenToDecor)
    77  	beam.RegisterFunction(overridingToFile)
    78  	beam.RegisterFunction(parseMarkedSource)
    79  	beam.RegisterFunction(refToCallsite)
    80  	beam.RegisterFunction(refToCrossRef)
    81  	beam.RegisterFunction(refToDecorPiece)
    82  	beam.RegisterFunction(refToTag)
    83  	beam.RegisterFunction(reverseEdge)
    84  	beam.RegisterFunction(splitEdge)
    85  	beam.RegisterFunction(targetToFile)
    86  	beam.RegisterFunction(toDefinition)
    87  	beam.RegisterFunction(toFiles)
    88  	beam.RegisterFunction(toRefs)
    89  
    90  	beam.RegisterType(reflect.TypeOf((*combineDecorPieces)(nil)).Elem())
    91  	beam.RegisterType(reflect.TypeOf((*ticketKey)(nil)).Elem())
    92  
    93  	beam.RegisterType(reflect.TypeOf((*cpb.Diagnostic)(nil)).Elem())
    94  	beam.RegisterType(reflect.TypeOf((*cpb.MarkedSource)(nil)).Elem())
    95  	beam.RegisterType(reflect.TypeOf((*ppb.DecorationPiece)(nil)).Elem())
    96  	beam.RegisterType(reflect.TypeOf((*ppb.Reference)(nil)).Elem())
    97  	beam.RegisterType(reflect.TypeOf((*scpb.Edge)(nil)).Elem())
    98  	beam.RegisterType(reflect.TypeOf((*scpb.Node)(nil)).Elem())
    99  	beam.RegisterType(reflect.TypeOf((*spb.Entry)(nil)).Elem())
   100  	beam.RegisterType(reflect.TypeOf((*spb.VName)(nil)).Elem())
   101  	beam.RegisterType(reflect.TypeOf((*srvpb.CorpusRoots)(nil)).Elem())
   102  	beam.RegisterType(reflect.TypeOf((*srvpb.Document)(nil)).Elem())
   103  	beam.RegisterType(reflect.TypeOf((*srvpb.EdgePage)(nil)).Elem())
   104  	beam.RegisterType(reflect.TypeOf((*srvpb.ExpandedAnchor)(nil)).Elem())
   105  	beam.RegisterType(reflect.TypeOf((*srvpb.File)(nil)).Elem())
   106  	beam.RegisterType(reflect.TypeOf((*srvpb.FileDecorations)(nil)).Elem())
   107  	beam.RegisterType(reflect.TypeOf((*srvpb.FileDirectory)(nil)).Elem())
   108  	beam.RegisterType(reflect.TypeOf((*srvpb.PagedCrossReferences)(nil)).Elem())
   109  	beam.RegisterType(reflect.TypeOf((*srvpb.PagedCrossReferences_Page)(nil)).Elem())
   110  	beam.RegisterType(reflect.TypeOf((*srvpb.PagedEdgeSet)(nil)).Elem())
   111  }
   112  
   113  // KytheBeam controls the lifetime and generation of PCollections in the Kythe
   114  // pipeline.
   115  type KytheBeam struct {
   116  	s beam.Scope
   117  
   118  	fileVNames beam.PCollection // *spb.VName
   119  	nodes      beam.PCollection // *scpb.Node
   120  	files      beam.PCollection // *srvpb.File
   121  	refs       beam.PCollection // *ppb.Reference
   122  	edges      beam.PCollection // *gspb.Edges
   123  
   124  	markedSources beam.PCollection // KV<*spb.VName, *cpb.MarkedSource>
   125  
   126  	anchorBuildConfigs beam.PCollection // KV<*spb.VName, string>
   127  }
   128  
   129  // FromNodes creates a KytheBeam pipeline from an input collection of
   130  // *spb.Nodes.
   131  func FromNodes(s beam.Scope, nodes beam.PCollection) *KytheBeam {
   132  	return &KytheBeam{s: s, nodes: nodes}
   133  }
   134  
   135  // FromEntries creates a KytheBeam pipeline from an input collection of
   136  // *spb.Entry messages.
   137  func FromEntries(s beam.Scope, entries beam.PCollection) *KytheBeam {
   138  	return FromNodes(s, nodes.FromEntries(s, entries))
   139  }
   140  
   141  func keyNode(n *scpb.Node) (*spb.VName, *scpb.Node) { return n.Source, n }
   142  
   143  // SplitCrossReferences returns a columnar Kythe cross-references table derived
   144  // from the Kythe input graph.  The beam.PCollection has elements of type
   145  // KV<[]byte, []byte>.
   146  func (k *KytheBeam) SplitCrossReferences() beam.PCollection {
   147  	s := k.s.Scope("SplitCrossReferences")
   148  
   149  	refs := beam.ParDo(s, refToCrossRef, k.References())
   150  	idx := beam.ParDo(s, nodeToCrossRef, beam.CoGroupByKey(s,
   151  		beam.ParDo(s, keyNode, k.Nodes()),
   152  		k.getMarkedSources(),
   153  		// TODO(schroederc): merge_with
   154  	))
   155  
   156  	callgraph := k.callGraph()
   157  
   158  	edges := k.edgeRelations()
   159  	relatedDefs := beam.ParDo(s, emitRelatedDefs, beam.CoGroupByKey(s,
   160  		k.directDefinitions(),
   161  		beam.ParDo(s, splitEdge, filter.Distinct(s, beam.ParDo(s, bareRevEdge, edges))),
   162  	))
   163  	relations := beam.ParDo(s, edgeToCrossRefRelation, edges)
   164  
   165  	return beam.ParDo(s, encodeCrossRef, beam.Flatten(s,
   166  		idx,
   167  		refs,
   168  		relations,
   169  		relatedDefs,
   170  		callgraph,
   171  	))
   172  }
   173  
   174  func (k *KytheBeam) callGraph() beam.PCollection {
   175  	s := k.s.Scope("CallGraph")
   176  	callsites := beam.ParDo(s, refToCallsite, k.References())
   177  	// TODO(schroederc): override callers
   178  	callers := beam.ParDo(s, constructCaller, beam.CoGroupByKey(s,
   179  		k.directDefinitions(),
   180  		k.getMarkedSources(),
   181  		beam.ParDo(s, splitEdge, filter.Distinct(s, beam.ParDo(s, callEdge, callsites))),
   182  	))
   183  	return beam.Flatten(s, callsites, callers)
   184  }
   185  
   186  func emitRelatedDefs(target *spb.VName, defStream func(**srvpb.ExpandedAnchor) bool, srcStream func(**spb.VName) bool, emit func(*xspb.CrossReferences)) {
   187  	var def *srvpb.ExpandedAnchor
   188  	if !defStream(&def) {
   189  		return // no related node definition found
   190  	}
   191  	nodeDef := &xspb.CrossReferences_NodeDefinition_{&xspb.CrossReferences_NodeDefinition{
   192  		Node:     target,
   193  		Location: def,
   194  	}}
   195  
   196  	var src *spb.VName
   197  	for srcStream(&src) {
   198  		emit(&xspb.CrossReferences{Source: src, Entry: nodeDef})
   199  	}
   200  }
   201  
   202  func bareRevEdge(eg *gspb.Edges, emit func(*scpb.Edge)) error {
   203  	switch e := eg.Entry.(type) {
   204  	case *gspb.Edges_Edge_:
   205  		edge := e.Edge
   206  		emit(&scpb.Edge{Target: eg.Source, Source: edge.Target})
   207  	}
   208  	return nil
   209  }
   210  
   211  func constructCaller(caller *spb.VName, defStream func(**srvpb.ExpandedAnchor) bool, msStream func(**cpb.MarkedSource) bool, calleeStream func(**spb.VName) bool, emit func(*xspb.CrossReferences)) {
   212  	var def *srvpb.ExpandedAnchor
   213  	if !defStream(&def) {
   214  		return // no caller definition found
   215  	}
   216  	var ms *cpb.MarkedSource
   217  	for msStream(&ms) {
   218  		break
   219  	}
   220  
   221  	var callee *spb.VName
   222  	for calleeStream(&callee) {
   223  		emit(&xspb.CrossReferences{
   224  			Source: callee,
   225  			Entry: &xspb.CrossReferences_Caller_{&xspb.CrossReferences_Caller{
   226  				Caller:       caller,
   227  				Location:     def,
   228  				MarkedSource: ms,
   229  			}},
   230  		})
   231  	}
   232  }
   233  
   234  func refToCallsite(r *ppb.Reference, emit func(*xspb.CrossReferences)) {
   235  	if r.GetKytheKind() != scpb.EdgeKind_REF_CALL || r.Scope == nil {
   236  		return
   237  	}
   238  	emit(&xspb.CrossReferences{
   239  		Source: r.Source,
   240  		Entry: &xspb.CrossReferences_Callsite_{&xspb.CrossReferences_Callsite{
   241  			Kind:     xspb.CrossReferences_Callsite_DIRECT,
   242  			Caller:   r.Scope,
   243  			Location: r.Anchor,
   244  		}},
   245  	})
   246  }
   247  
   248  func callEdge(x *xspb.CrossReferences) *scpb.Edge {
   249  	return &scpb.Edge{Source: x.GetCallsite().GetCaller(), Target: x.GetSource()}
   250  }
   251  
   252  func edgeToCrossRefRelation(eg *gspb.Edges, emit func(*xspb.CrossReferences)) error {
   253  	switch e := eg.Entry.(type) {
   254  	case *gspb.Edges_Edge_:
   255  		edge := e.Edge
   256  		r := &xspb.CrossReferences_Relation{
   257  			Ordinal: edge.Ordinal,
   258  			Reverse: edge.Reverse,
   259  			Node:    edge.Target,
   260  		}
   261  		if k := edge.GetGenericKind(); k != "" {
   262  			r.Kind = &xspb.CrossReferences_Relation_GenericKind{k}
   263  		} else {
   264  			r.Kind = &xspb.CrossReferences_Relation_KytheKind{edge.GetKytheKind()}
   265  		}
   266  		emit(&xspb.CrossReferences{
   267  			Source: eg.Source,
   268  			Entry:  &xspb.CrossReferences_Relation_{r},
   269  		})
   270  		return nil
   271  	case *gspb.Edges_Target_:
   272  		target := e.Target
   273  		emit(&xspb.CrossReferences{
   274  			Source: eg.Source,
   275  			Entry: &xspb.CrossReferences_RelatedNode_{&xspb.CrossReferences_RelatedNode{
   276  				Node: target.Node,
   277  			}},
   278  		})
   279  		return nil
   280  	default:
   281  		return fmt.Errorf("unexpected Edges entry: %T", e)
   282  	}
   283  }
   284  
   285  // CrossReferences returns a Kythe file decorations table derived from the Kythe
   286  // input graph.  The beam.PCollections have elements of type
   287  // KV<string, *srvpb.PagedCrossReferences> and
   288  // KV<string, *srvpb.PagedCrossReferences_Page>, respectively.
   289  func (k *KytheBeam) CrossReferences() (sets, pages beam.PCollection) {
   290  	s := k.s.Scope("CrossReferences")
   291  	refs := beam.CoGroupByKey(s,
   292  		beam.ParDo(s, keyRef, k.References()),
   293  		beam.ParDo(s, keyCrossRef, k.callGraph()),
   294  	)
   295  	// TODO(schroederc): related nodes
   296  	// TODO(schroederc): MarkedSource
   297  	// TODO(schroederc): source_node
   298  	return beam.ParDo2(s, groupCrossRefs, refs)
   299  }
   300  
   301  var callerKinds = map[xspb.CrossReferences_Callsite_Kind]string{
   302  	xspb.CrossReferences_Callsite_DIRECT:   "#internal/ref/call/direct",
   303  	xspb.CrossReferences_Callsite_OVERRIDE: "#internal/ref/call/override",
   304  }
   305  
   306  // groupCrossRefs emits *srvpb.PagedCrossReferences and *srvpb.PagedCrossReferences_Pages for a
   307  // single node's collection of *ppb.References and callsites.
   308  func groupCrossRefs(
   309  	key *spb.VName,
   310  	refStream func(**ppb.Reference) bool,
   311  	callStream func(**xspb.CrossReferences) bool,
   312  	emitSet func(string, *srvpb.PagedCrossReferences),
   313  	emitPage func(string, *srvpb.PagedCrossReferences_Page)) {
   314  	set := &srvpb.PagedCrossReferences{SourceTicket: kytheuri.ToString(key)}
   315  	// TODO(schroederc): add paging
   316  
   317  	// kind -> build_config -> group
   318  	groups := make(map[string]map[string]*srvpb.PagedCrossReferences_Group)
   319  
   320  	var ref *ppb.Reference
   321  	for refStream(&ref) {
   322  		kind := refKind(ref)
   323  		configs, ok := groups[kind]
   324  		if !ok {
   325  			configs = make(map[string]*srvpb.PagedCrossReferences_Group)
   326  			groups[kind] = configs
   327  		}
   328  		config := ref.Anchor.BuildConfiguration
   329  		g, ok := configs[config]
   330  		if !ok {
   331  			g = &srvpb.PagedCrossReferences_Group{Kind: kind, BuildConfig: config}
   332  			configs[config] = g
   333  			set.Group = append(set.Group, g)
   334  		}
   335  		g.Anchor = append(g.Anchor, ref.Anchor)
   336  	}
   337  
   338  	callers := make(map[string]*xspb.CrossReferences_Caller)
   339  	callsites := make(map[string][]*xspb.CrossReferences_Callsite)
   340  	var call *xspb.CrossReferences
   341  	for callStream(&call) {
   342  		switch e := call.Entry.(type) {
   343  		case *xspb.CrossReferences_Caller_:
   344  			callers[kytheuri.ToString(e.Caller.Caller)] = e.Caller
   345  		case *xspb.CrossReferences_Callsite_:
   346  			ticket := kytheuri.ToString(e.Callsite.Caller)
   347  			callsites[ticket] = append(callsites[ticket], e.Callsite)
   348  		}
   349  	}
   350  	for ticket, caller := range callers {
   351  		for _, site := range callsites[ticket] {
   352  			kind := callerKinds[site.Kind]
   353  			configs, ok := groups[kind]
   354  			if !ok {
   355  				configs = make(map[string]*srvpb.PagedCrossReferences_Group)
   356  				groups[kind] = configs
   357  			}
   358  			config := site.Location.BuildConfiguration
   359  			g, ok := configs[config]
   360  			if !ok {
   361  				g = &srvpb.PagedCrossReferences_Group{
   362  					Kind:        kind,
   363  					BuildConfig: config,
   364  				}
   365  				configs[config] = g
   366  				set.Group = append(set.Group, g)
   367  			}
   368  
   369  			var groupCaller *srvpb.PagedCrossReferences_Caller
   370  			for _, c := range g.Caller {
   371  				if c.SemanticCaller == ticket {
   372  					groupCaller = c
   373  					break
   374  				}
   375  			}
   376  			if groupCaller == nil {
   377  				groupCaller = &srvpb.PagedCrossReferences_Caller{
   378  					Caller:         caller.Location,
   379  					SemanticCaller: ticket,
   380  					MarkedSource:   caller.MarkedSource,
   381  				}
   382  				g.Caller = append(g.Caller, groupCaller)
   383  			}
   384  			groupCaller.Callsite = append(groupCaller.Callsite, site.Location)
   385  		}
   386  	}
   387  
   388  	sort.Slice(set.Group, func(i, j int) bool {
   389  		return compare.Strings(set.Group[i].BuildConfig, set.Group[j].BuildConfig).
   390  			AndThen(set.Group[i].Kind, set.Group[j].Kind) == compare.LT
   391  	})
   392  	for _, g := range set.Group {
   393  		sort.Slice(g.Anchor, func(i, j int) bool { return g.Anchor[i].Ticket < g.Anchor[j].Ticket })
   394  		for _, caller := range g.Caller {
   395  			sort.Slice(caller.Callsite, func(i, j int) bool { return caller.Callsite[i].Ticket < caller.Callsite[j].Ticket })
   396  		}
   397  	}
   398  
   399  	emitSet("xrefs:"+set.SourceTicket, set)
   400  }
   401  
   402  func keyRef(r *ppb.Reference) (*spb.VName, *ppb.Reference) {
   403  	return r.Source, &ppb.Reference{
   404  		Kind:   r.Kind,
   405  		Anchor: r.Anchor,
   406  	}
   407  }
   408  
   409  func keyCrossRef(xr *xspb.CrossReferences) (*spb.VName, *xspb.CrossReferences) {
   410  	return xr.Source, &xspb.CrossReferences{Entry: xr.Entry}
   411  }
   412  
   413  func (k *KytheBeam) decorationPieces(s beam.Scope) beam.PCollection {
   414  	decor := beam.ParDo(s, refToDecorPiece, k.References())
   415  
   416  	targets := beam.ParDo(s, targetToFile, decor)
   417  	bareNodes := beam.ParDo(s, &nodes.Filter{IncludeEdges: []string{}}, k.nodes)
   418  
   419  	files := beam.ParDo(s, fileToDecorPiece, k.getFiles())
   420  	targetNodes := beam.ParDo(s, nodeToDecorPiece,
   421  		beam.CoGroupByKey(s, beam.ParDo(s, moveSourceToKey, bareNodes), targets))
   422  	defs := beam.ParDo(s, defToDecorPiece,
   423  		beam.CoGroupByKey(s, k.directDefinitions(), targets))
   424  	overrides := k.overrides(targets)
   425  	decorDiagnostics := k.diagnostics()
   426  
   427  	return beam.Flatten(s, decor, files, targetNodes, defs, decorDiagnostics, overrides)
   428  }
   429  
   430  func (k *KytheBeam) overrides(targets beam.PCollection) beam.PCollection {
   431  	s := k.s.Scope("Overrides")
   432  	overriddenToEdge := beam.Seq(s, k.Nodes(), &nodes.Filter{IncludeEdges: []string{edges.Overrides, edges.Extends, edges.OverridesTransitive, edges.Satisfies}}, nodeToEdges)
   433  	overridingToDecor := beam.ParDo(s, overriddenToDecor, beam.CoGroupByKey(s, k.directDefinitions(), overriddenToEdge))
   434  	return beam.ParDo(s, overridingToFile, beam.CoGroupByKey(s, targets, overridingToDecor))
   435  }
   436  
   437  func overriddenToDecor(overridden *spb.VName, overriddenAnchors func(**srvpb.ExpandedAnchor) bool, edgeStream func(**scpb.Edge) bool, emit func(*spb.VName, *ppb.DecorationPiece)) {
   438  	var overriddenAnchor *srvpb.ExpandedAnchor
   439  	var e *scpb.Edge
   440  	if !overriddenAnchors(&overriddenAnchor) {
   441  		return
   442  	}
   443  	for edgeStream(&e) {
   444  		var kind srvpb.FileDecorations_Override_Kind
   445  		edgeKindString := schema.EdgeKindString(e.GetKytheKind())
   446  		if edges.IsVariant(edgeKindString, edges.Overrides) {
   447  			kind = srvpb.FileDecorations_Override_OVERRIDES
   448  		} else if edges.IsVariant(edgeKindString, edges.Extends) || edges.IsVariant(edgeKindString, edges.Satisfies) {
   449  			kind = srvpb.FileDecorations_Override_EXTENDS
   450  		} else {
   451  			continue
   452  		}
   453  		emit(e.Source, &ppb.DecorationPiece{
   454  			Piece: &ppb.DecorationPiece_TargetOverride{
   455  				TargetOverride: &xspb.FileDecorations_TargetOverride{
   456  					Overriding:           e.Source,
   457  					Overridden:           e.Target,
   458  					Kind:                 kind,
   459  					OverridingDefinition: overriddenAnchor,
   460  				},
   461  			},
   462  		})
   463  	}
   464  }
   465  
   466  func overridingToFile(target *spb.VName, files func(**spb.VName) bool, overrides func(**ppb.DecorationPiece) bool, emit func(*spb.VName, *ppb.DecorationPiece)) {
   467  	var file *spb.VName
   468  	if !files(&file) {
   469  		return
   470  	}
   471  	var override *ppb.DecorationPiece
   472  	for overrides(&override) {
   473  		emit(file, override)
   474  	}
   475  }
   476  
   477  func (k *KytheBeam) diagnostics() beam.PCollection {
   478  	s := k.s.Scope("Diagnostics")
   479  	diagnostics := beam.Seq(s, k.Nodes(), &nodes.Filter{
   480  		FilterByKind: []string{kinds.Diagnostic},
   481  		IncludeFacts: []string{facts.Message, facts.Details, facts.ContextURL},
   482  	}, nodeToDiagnostic)
   483  	refTags := beam.ParDo(s, refToTag, k.References())
   484  	fileTags := beam.Seq(s, k.Nodes(), &nodes.Filter{
   485  		FilterByKind: []string{kinds.File},
   486  		IncludeFacts: []string{},
   487  		IncludeEdges: []string{edges.Tagged},
   488  	}, fileToTags)
   489  	return beam.ParDo(s, diagToDecor, beam.CoGroupByKey(s, diagnostics, refTags, fileTags))
   490  }
   491  
   492  func fileToTags(n *scpb.Node, emit func(*spb.VName, *spb.VName)) {
   493  	for _, e := range n.Edge {
   494  		emit(e.Target, n.Source)
   495  	}
   496  }
   497  
   498  func diagToDecor(src *spb.VName, diagStream func(**cpb.Diagnostic) bool, refTagStream func(**srvpb.ExpandedAnchor) bool, fileTagStream func(**spb.VName) bool, emit func(*spb.VName, *ppb.DecorationPiece)) error {
   499  	var d *cpb.Diagnostic
   500  	if !diagStream(&d) {
   501  		return nil
   502  	}
   503  
   504  	var ref *srvpb.ExpandedAnchor
   505  	for refTagStream(&ref) {
   506  		uri, err := kytheuri.Parse(ref.Ticket)
   507  		if err != nil {
   508  			return err
   509  		}
   510  		file := &spb.VName{
   511  			Corpus: uri.Corpus,
   512  			Root:   uri.Root,
   513  			Path:   uri.Path,
   514  		}
   515  		diagWithSpan := *d
   516  		diagWithSpan.Span = ref.Span
   517  		emit(file, &ppb.DecorationPiece{
   518  			Piece: &ppb.DecorationPiece_Diagnostic{
   519  				Diagnostic: &diagWithSpan,
   520  			},
   521  		})
   522  	}
   523  
   524  	var file *spb.VName
   525  	for fileTagStream(&file) {
   526  		emit(file, &ppb.DecorationPiece{
   527  			Piece: &ppb.DecorationPiece_Diagnostic{Diagnostic: d},
   528  		})
   529  	}
   530  
   531  	return nil
   532  }
   533  
   534  func refToTag(r *ppb.Reference, emit func(*spb.VName, *srvpb.ExpandedAnchor)) {
   535  	if r.GetKytheKind() != scpb.EdgeKind_TAGGED {
   536  		return
   537  	}
   538  	emit(r.Source, r.Anchor)
   539  }
   540  
   541  func nodeToDiagnostic(n *scpb.Node) (*spb.VName, *cpb.Diagnostic) {
   542  	d := &cpb.Diagnostic{}
   543  	for _, f := range n.Fact {
   544  		switch f.GetKytheName() {
   545  		case scpb.FactName_MESSAGE:
   546  			d.Message = string(f.Value)
   547  		case scpb.FactName_DETAILS:
   548  			d.Details = string(f.Value)
   549  		case scpb.FactName_CONTEXT_URL:
   550  			d.ContextUrl = string(f.Value)
   551  		}
   552  	}
   553  	return n.Source, d
   554  }
   555  
   556  // SplitDecorations returns a columnar Kythe file decorations table derived from
   557  // the Kythe input graph.  The beam.PCollection has elements of type
   558  // KV<[]byte, []byte>.
   559  func (k *KytheBeam) SplitDecorations() beam.PCollection {
   560  	s := k.s.Scope("SplitDecorations")
   561  	return beam.ParDo(s, encodeDecorPiece, k.decorationPieces(s))
   562  }
   563  
   564  // Decorations returns a Kythe file decorations table derived from the Kythe
   565  // input graph.  The beam.PCollection has elements of type
   566  // KV<string, *srvpb.FileDecorations>.
   567  func (k *KytheBeam) Decorations() beam.PCollection {
   568  	s := k.s.Scope("Decorations")
   569  	pieces := k.decorationPieces(s)
   570  	return beam.ParDo(s, &ticketKey{"decor:"}, beam.CombinePerKey(s, &combineDecorPieces{}, pieces))
   571  }
   572  
   573  type ticketKey struct{ Prefix string }
   574  
   575  func (t *ticketKey) ProcessElement(key *spb.VName, val beam.T) (string, beam.T) {
   576  	return t.Prefix + kytheuri.ToString(key), val
   577  }
   578  
   579  func targetToFile(file *spb.VName, p *ppb.DecorationPiece) (*spb.VName, *spb.VName, error) {
   580  	return p.GetReference().Source, file, nil
   581  }
   582  
   583  // combineDecorPieces combines *ppb.DecorationPieces into a single *srvpb.FileDecorations.
   584  type combineDecorPieces struct{}
   585  
   586  func (c *combineDecorPieces) CreateAccumulator() *srvpb.FileDecorations {
   587  	return &srvpb.FileDecorations{}
   588  }
   589  
   590  func (c *combineDecorPieces) MergeAccumulators(accum, n *srvpb.FileDecorations) *srvpb.FileDecorations {
   591  	accum.Decoration = append(accum.Decoration, n.Decoration...)
   592  	if accum.File == nil {
   593  		accum.File = n.File
   594  	}
   595  	accum.Target = append(accum.Target, n.Target...)
   596  	accum.TargetDefinitions = append(accum.TargetDefinitions, n.TargetDefinitions...)
   597  	accum.Diagnostic = append(accum.Diagnostic, n.Diagnostic...)
   598  	return accum
   599  }
   600  
   601  func (c *combineDecorPieces) AddInput(accum *srvpb.FileDecorations, p *ppb.DecorationPiece) *srvpb.FileDecorations {
   602  	switch p := p.Piece.(type) {
   603  	case *ppb.DecorationPiece_Reference:
   604  		ref := p.Reference
   605  		accum.Decoration = append(accum.Decoration, &srvpb.FileDecorations_Decoration{
   606  			Anchor: &srvpb.RawAnchor{
   607  				StartOffset: ref.Anchor.Span.Start.ByteOffset,
   608  				EndOffset:   ref.Anchor.Span.End.ByteOffset,
   609  
   610  				BuildConfiguration: ref.Anchor.BuildConfiguration,
   611  			},
   612  			Kind:   refKind(ref),
   613  			Target: kytheuri.ToString(ref.Source),
   614  		})
   615  	case *ppb.DecorationPiece_File:
   616  		accum.File = p.File
   617  	case *ppb.DecorationPiece_Node:
   618  		accum.Target = append(accum.Target, convertPipelineNode(p.Node))
   619  	case *ppb.DecorationPiece_Definition_:
   620  		// TODO(schroederc): redesign *srvpb.FileDecorations to not need invasive
   621  		// changes to add a node's definition
   622  		def := p.Definition
   623  		accum.TargetDefinitions = append(accum.TargetDefinitions, def.Definition)
   624  		// Add a marker to associate the definition and node.  ExtractOutput will
   625  		// later embed the definition within accum.Target/accum.TargetOverride.
   626  		accum.Target = append(accum.Target, &srvpb.Node{
   627  			Ticket:             kytheuri.ToString(def.Node),
   628  			DefinitionLocation: &srvpb.ExpandedAnchor{Ticket: def.Definition.Ticket},
   629  		})
   630  	case *ppb.DecorationPiece_Diagnostic:
   631  		accum.Diagnostic = append(accum.Diagnostic, p.Diagnostic)
   632  	case *ppb.DecorationPiece_TargetOverride:
   633  		accum.TargetOverride = append(accum.TargetOverride, &srvpb.FileDecorations_Override{
   634  			Overriding:           kytheuri.ToString(p.TargetOverride.Overriding),
   635  			Overridden:           kytheuri.ToString(p.TargetOverride.Overridden),
   636  			OverriddenDefinition: p.TargetOverride.OverridingDefinition.Ticket,
   637  			Kind:                 p.TargetOverride.Kind,
   638  		})
   639  	default:
   640  		panic(fmt.Errorf("unhandled DecorationPiece: %T", p))
   641  	}
   642  	return accum
   643  }
   644  
   645  func convertPipelineNode(node *scpb.Node) *srvpb.Node {
   646  	n := &srvpb.Node{Ticket: kytheuri.ToString(node.Source)}
   647  	if kind := schema.GetNodeKind(node); kind != "" {
   648  		n.Fact = append(n.Fact, &cpb.Fact{
   649  			Name:  facts.NodeKind,
   650  			Value: []byte(kind),
   651  		})
   652  	}
   653  	if subkind := schema.GetSubkind(node); subkind != "" {
   654  		n.Fact = append(n.Fact, &cpb.Fact{
   655  			Name:  facts.Subkind,
   656  			Value: []byte(subkind),
   657  		})
   658  	}
   659  	for _, f := range node.Fact {
   660  		n.Fact = append(n.Fact, &cpb.Fact{
   661  			Name:  schema.GetFactName(f),
   662  			Value: f.Value,
   663  		})
   664  	}
   665  	sort.Slice(n.Fact, func(i, j int) bool { return n.Fact[i].Name < n.Fact[j].Name })
   666  	return n
   667  }
   668  
   669  func (c *combineDecorPieces) ExtractOutput(fd *srvpb.FileDecorations) *srvpb.FileDecorations {
   670  	// Embed definitions for Decorations and Overrides
   671  	for i := len(fd.Target) - 1; i >= 0; i-- {
   672  		if fd.Target[i].DefinitionLocation == nil {
   673  			continue
   674  		}
   675  		node, def := fd.Target[i].Ticket, fd.Target[i].DefinitionLocation.Ticket
   676  		fd.Target = append(fd.Target[:i], fd.Target[i+1:]...)
   677  
   678  		for _, d := range fd.Decoration {
   679  			if d.Target == node {
   680  				d.TargetDefinition = def
   681  			}
   682  		}
   683  		for _, o := range fd.TargetOverride {
   684  			if o.Overridden == node {
   685  				o.OverriddenDefinition = def
   686  			}
   687  		}
   688  	}
   689  
   690  	sort.Slice(fd.Decoration, func(i, j int) bool {
   691  		if c := compare.Ints(int(fd.Decoration[i].Anchor.StartOffset), int(fd.Decoration[j].Anchor.StartOffset)); c != compare.EQ {
   692  			return c == compare.LT
   693  		} else if c := compare.Ints(int(fd.Decoration[i].Anchor.EndOffset), int(fd.Decoration[j].Anchor.EndOffset)); c != compare.EQ {
   694  			return c == compare.LT
   695  		} else if c := compare.Strings(fd.Decoration[i].Kind, fd.Decoration[j].Kind); c != compare.EQ {
   696  			return c == compare.LT
   697  		}
   698  		return fd.Decoration[i].Target < fd.Decoration[j].Target
   699  	})
   700  	sort.Slice(fd.Target, func(i, j int) bool { return fd.Target[i].Ticket < fd.Target[j].Ticket })
   701  	sort.Slice(fd.TargetDefinitions, func(i, j int) bool { return fd.TargetDefinitions[i].Ticket < fd.TargetDefinitions[j].Ticket })
   702  
   703  	sort.Slice(fd.Diagnostic, func(i, j int) bool {
   704  		a, b := fd.Diagnostic[i], fd.Diagnostic[j]
   705  		return compare.Compare(a.Span.GetStart().GetByteOffset(), b.Span.GetStart().GetByteOffset()).
   706  			AndThen(a.Span.GetEnd().GetByteOffset(), b.Span.GetEnd().GetByteOffset()).
   707  			AndThen(a.Message, b.Message) == compare.LT
   708  	})
   709  	return fd
   710  }
   711  
   712  func fileToDecorPiece(src *spb.VName, f *srvpb.File) (*spb.VName, *ppb.DecorationPiece) {
   713  	return src, &ppb.DecorationPiece{Piece: &ppb.DecorationPiece_File{f}}
   714  }
   715  
   716  func refToDecorPiece(r *ppb.Reference, emit func(*spb.VName, *ppb.DecorationPiece)) error {
   717  	if r.GetKytheKind() == scpb.EdgeKind_TAGGED {
   718  		return nil
   719  	}
   720  	p := &ppb.DecorationPiece{
   721  		Piece: &ppb.DecorationPiece_Reference{&ppb.Reference{
   722  			Source: r.Source,
   723  			Kind:   r.Kind,
   724  			Anchor: r.Anchor,
   725  		}},
   726  	}
   727  	file, err := anchorToFileVName(r.Anchor.Ticket)
   728  	if err != nil {
   729  		return err
   730  	}
   731  	emit(file, p)
   732  	return nil
   733  }
   734  
   735  func anchorToFileVName(anchorTicket string) (*spb.VName, error) {
   736  	anchor, err := kytheuri.ToVName(anchorTicket)
   737  	if err != nil {
   738  		return nil, err
   739  	}
   740  	return fileVName(anchor), nil
   741  }
   742  
   743  func fileVName(anchor *spb.VName) *spb.VName {
   744  	return &spb.VName{
   745  		Corpus: anchor.Corpus,
   746  		Root:   anchor.Root,
   747  		Path:   anchor.Path,
   748  	}
   749  }
   750  
   751  func nodeToDecorPiece(key *spb.VName, node func(**scpb.Node) bool, file func(**spb.VName) bool, emit func(*spb.VName, *ppb.DecorationPiece)) {
   752  	var n, singleNode *scpb.Node
   753  	for node(&n) {
   754  		singleNode = n
   755  	}
   756  	if singleNode == nil {
   757  		return
   758  	}
   759  
   760  	piece := &ppb.DecorationPiece{
   761  		Piece: &ppb.DecorationPiece_Node{&scpb.Node{
   762  			Source:  key,
   763  			Kind:    singleNode.Kind,
   764  			Subkind: singleNode.Subkind,
   765  			Fact:    singleNode.Fact,
   766  			Edge:    singleNode.Edge,
   767  		}},
   768  	}
   769  
   770  	var f *spb.VName
   771  	for file(&f) {
   772  		emit(f, piece)
   773  	}
   774  }
   775  
   776  func defToDecorPiece(node *spb.VName, defs func(**srvpb.ExpandedAnchor) bool, file func(**spb.VName) bool, emit func(*spb.VName, *ppb.DecorationPiece)) {
   777  	var def *srvpb.ExpandedAnchor
   778  	for defs(&def) {
   779  		// TODO(schroederc): select ambiguous definition better
   780  		break // pick first known definition
   781  	}
   782  	if def == nil {
   783  		return
   784  	}
   785  	piece := &ppb.DecorationPiece{
   786  		Piece: &ppb.DecorationPiece_Definition_{&ppb.DecorationPiece_Definition{
   787  			Node:       node,
   788  			Definition: def,
   789  		}},
   790  	}
   791  	var f *spb.VName
   792  	for file(&f) {
   793  		emit(f, piece)
   794  	}
   795  }
   796  
   797  // Nodes returns all *scpb.Nodes from the Kythe input graph.
   798  func (k *KytheBeam) Nodes() beam.PCollection { return k.nodes }
   799  
   800  // References returns all derived *ppb.References from the Kythe input graph.
   801  func (k *KytheBeam) References() beam.PCollection {
   802  	if k.refs.IsValid() {
   803  		return k.refs
   804  	}
   805  	s := k.s.Scope("References")
   806  	anchors := beam.ParDo(s, keyByPath, beam.ParDo(s,
   807  		&nodes.Filter{
   808  			FilterByKind: []string{kinds.Anchor},
   809  			IncludeFacts: []string{
   810  				facts.AnchorStart, facts.AnchorEnd,
   811  				facts.SnippetStart, facts.SnippetEnd,
   812  				facts.BuildConfig,
   813  			},
   814  		}, k.nodes))
   815  	k.refs = beam.ParDo(s, toRefs, beam.CoGroupByKey(s, k.getFiles(), anchors))
   816  	return k.refs
   817  }
   818  
   819  func (k *KytheBeam) getFiles() beam.PCollection {
   820  	if !k.files.IsValid() {
   821  		fileNodes := beam.ParDo(k.s,
   822  			&nodes.Filter{
   823  				FilterByKind: []string{kinds.File},
   824  				IncludeFacts: []string{facts.Text, facts.TextEncoding},
   825  			}, k.nodes)
   826  		k.files = beam.ParDo(k.s, toFiles, fileNodes)
   827  	}
   828  	return k.files
   829  }
   830  
   831  func keyByPath(n *scpb.Node) (*spb.VName, *scpb.Node) {
   832  	return &spb.VName{Corpus: n.Source.Corpus, Root: n.Source.Root, Path: n.Source.Path}, n
   833  }
   834  
   835  func toRefs(p *spb.VName, file func(**srvpb.File) bool, anchor func(**scpb.Node) bool, emit func(*ppb.Reference)) error {
   836  	var f *srvpb.File
   837  	if !file(&f) {
   838  		return nil
   839  	}
   840  	return normalizeAnchors(f, anchor, emit)
   841  }
   842  
   843  func toFiles(n *scpb.Node) (*spb.VName, *srvpb.File) {
   844  	var f srvpb.File
   845  	for _, fact := range n.Fact {
   846  		switch fact.GetKytheName() {
   847  		case scpb.FactName_TEXT:
   848  			f.Text = fact.Value
   849  		case scpb.FactName_TEXT_ENCODING:
   850  			f.Encoding = string(fact.Value)
   851  		}
   852  	}
   853  	return n.Source, &f
   854  }
   855  
   856  func normalizeAnchors(file *srvpb.File, anchor func(**scpb.Node) bool, emit func(*ppb.Reference)) error {
   857  	norm := span.NewNormalizer(file.Text)
   858  	var n *scpb.Node
   859  	for anchor(&n) {
   860  		raw, err := toRawAnchor(n)
   861  		if err != nil {
   862  			return err
   863  		}
   864  		a, err := assemble.ExpandAnchor(raw, file, norm, "")
   865  		if err != nil {
   866  			log.Errorf("expanding anchor {%+v}: %v", raw, err)
   867  			break
   868  		}
   869  
   870  		var parent *spb.VName
   871  		for _, e := range n.Edge {
   872  			if e.GetKytheKind() == scpb.EdgeKind_CHILD_OF {
   873  				// There should only be a single parent for each anchor.
   874  				parent = e.Target
   875  				break
   876  			}
   877  		}
   878  
   879  		for _, e := range n.Edge {
   880  			if e.GetKytheKind() == scpb.EdgeKind_CHILD_OF {
   881  				continue
   882  			}
   883  			ref := &ppb.Reference{
   884  				Source: e.Target,
   885  				Anchor: a,
   886  				Scope:  parent,
   887  			}
   888  			if k := e.GetKytheKind(); k == scpb.EdgeKind_UNKNOWN_EDGE_KIND {
   889  				ref.Kind = &ppb.Reference_GenericKind{e.GetGenericKind()}
   890  			} else {
   891  				ref.Kind = &ppb.Reference_KytheKind{k}
   892  			}
   893  			emit(ref)
   894  		}
   895  	}
   896  	return nil
   897  }
   898  
   899  func toRawAnchor(n *scpb.Node) (*srvpb.RawAnchor, error) {
   900  	var a srvpb.RawAnchor
   901  	for _, f := range n.Fact {
   902  		var err error
   903  		switch f.GetKytheName() {
   904  		case scpb.FactName_BUILD_CONFIG:
   905  			a.BuildConfiguration = string(f.Value)
   906  		case scpb.FactName_LOC_START:
   907  			a.StartOffset, err = factValueToInt(f)
   908  		case scpb.FactName_LOC_END:
   909  			a.EndOffset, err = factValueToInt(f)
   910  		case scpb.FactName_SNIPPET_START:
   911  			a.SnippetStart, err = factValueToInt(f)
   912  		case scpb.FactName_SNIPPET_END:
   913  			a.SnippetEnd, err = factValueToInt(f)
   914  		default:
   915  			return nil, fmt.Errorf("unhandled fact: %v", f)
   916  		}
   917  		if err != nil {
   918  			return nil, err
   919  		}
   920  	}
   921  	a.Ticket = kytheuri.ToString(n.Source)
   922  	return &a, nil
   923  }
   924  
   925  func factValueToInt(f *scpb.Fact) (int32, error) {
   926  	i, err := strconv.Atoi(string(f.Value))
   927  	if err != nil {
   928  		return 0, fmt.Errorf("invalid integer fact value for %q: %v", schema.GetFactName(f), err)
   929  	}
   930  	return int32(i), nil
   931  }
   932  
   933  func moveSourceToKey(n *scpb.Node) (*spb.VName, *scpb.Node) {
   934  	return n.Source, &scpb.Node{
   935  		Kind:    n.Kind,
   936  		Subkind: n.Subkind,
   937  		Fact:    n.Fact,
   938  		Edge:    n.Edge,
   939  	}
   940  }
   941  
   942  func (k *KytheBeam) directDefinitions() beam.PCollection {
   943  	s := k.s.Scope("DirectDefinitions")
   944  	return beam.ParDo(s, toDefinition, k.References())
   945  }
   946  
   947  func toDefinition(r *ppb.Reference, emit func(*spb.VName, *srvpb.ExpandedAnchor)) error {
   948  	if edges.IsVariant(refKind(r), edges.Defines) {
   949  		emit(r.Source, r.Anchor)
   950  	}
   951  	return nil
   952  }
   953  
   954  func refKind(r *ppb.Reference) string {
   955  	if k := r.GetKytheKind(); k != scpb.EdgeKind_UNKNOWN_EDGE_KIND {
   956  		return schema.EdgeKindString(k)
   957  	}
   958  	return r.GetGenericKind()
   959  }
   960  
   961  // Edges returns a Kythe edges table derived from the Kythe input graph.  The beam.PCollections have
   962  // elements of type KV<string, *srvpb.PagedEdgeSet> and KV<string, *srvpb.EdgePage>, respectively.
   963  func (k *KytheBeam) Edges() (beam.PCollection, beam.PCollection) {
   964  	s := k.s.Scope("Edges")
   965  
   966  	nodes := beam.ParDo(s, moveSourceToKey, k.nodes)
   967  	edges := beam.ParDo(s, reverseEdge, beam.CoGroupByKey(s, nodes, beam.ParDo(s, nodeToEdges, k.nodes)))
   968  	rev := beam.ParDo(s, nodeToReverseEdges, k.nodes)
   969  
   970  	return beam.ParDo2(s, groupEdges, beam.CoGroupByKey(s, nodes, edges, rev))
   971  }
   972  
   973  // edgeRelations returns a beam.PCollection of gspb.Edges for all Kythe graph
   974  // relations.
   975  func (k *KytheBeam) edgeRelations() beam.PCollection {
   976  	if !k.edges.IsValid() {
   977  		s := k.s.Scope("Relations")
   978  
   979  		nodeEdges := beam.Seq(s, k.nodes, filterAnchorNodes, &nodes.Filter{IncludeFacts: []string{}})
   980  		sourceNodes := beam.ParDo(s, moveSourceToKey, k.nodes)
   981  
   982  		targetNodes := beam.ParDo(s, encodeEdgeTarget, beam.CoGroupByKey(s,
   983  			sourceNodes,
   984  			beam.ParDo(s, splitEdge, filter.Distinct(s, beam.ParDo(s, edgeTargets, nodeEdges)))))
   985  		edges := beam.ParDo(s, encodeEdges, nodeEdges)
   986  
   987  		k.edges = beam.Flatten(s, edges, targetNodes)
   988  	}
   989  	return k.edges
   990  }
   991  
   992  // SplitEdges returns a columnar Kythe edges table derived from the Kythe input
   993  // graph.  The beam.PCollection have elements of type KV<[]byte, []byte>.
   994  func (k *KytheBeam) SplitEdges() beam.PCollection {
   995  	s := k.s.Scope("SplitEdges")
   996  
   997  	idx := beam.ParDo(s, combineEdgesIndex,
   998  		// TODO(schroederc): counts; also needed for presence with only rev edges
   999  		beam.ParDo(s, keyNode, beam.ParDo(s, &nodes.Filter{IncludeEdges: []string{}}, k.Nodes())))
  1000  
  1001  	return beam.ParDo(s, encodeEdgesEntry, beam.Flatten(s, idx, k.edgeRelations()))
  1002  }
  1003  
  1004  func filterAnchorNodes(n *scpb.Node, emit func(*scpb.Node)) {
  1005  	if n.GetKytheKind() == scpb.NodeKind_ANCHOR {
  1006  		return
  1007  	}
  1008  	emit(n)
  1009  }
  1010  
  1011  func edgeTargets(n *scpb.Node, emit func(*scpb.Edge)) {
  1012  	for _, e := range n.Edge {
  1013  		emit(&scpb.Edge{Source: n.Source, Target: e.Target})
  1014  		emit(&scpb.Edge{Target: n.Source, Source: e.Target})
  1015  	}
  1016  }
  1017  
  1018  func splitEdge(e *scpb.Edge) (*spb.VName, *spb.VName) { return e.Source, e.Target }
  1019  
  1020  func combineEdgesIndex(src *spb.VName, node *scpb.Node) *gspb.Edges {
  1021  	return &gspb.Edges{
  1022  		Source: src,
  1023  		Entry: &gspb.Edges_Index_{&gspb.Edges_Index{
  1024  			Node: node,
  1025  		}},
  1026  	}
  1027  }
  1028  
  1029  // nodeToReverseEdges emits an *scpb.Edge with its SourceNode populated for each of n's edges.  The
  1030  // key for each *scpb.Edge is its Target VName.
  1031  func nodeToReverseEdges(n *scpb.Node, emit func(*spb.VName, *scpb.Edge)) {
  1032  	node := nodeWithoutEdges(n)
  1033  	for _, e := range n.Edge {
  1034  		emit(e.Target, &scpb.Edge{
  1035  			SourceNode: node,
  1036  			Target:     e.Target,
  1037  			Kind:       e.Kind,
  1038  			Ordinal:    e.Ordinal,
  1039  		})
  1040  	}
  1041  }
  1042  
  1043  // nodeToEdges emits an *scpb.Edge for each of n's edges.  The key for each *scpb.Edge is its Target
  1044  // VName.
  1045  func nodeToEdges(n *scpb.Node, emit func(*spb.VName, *scpb.Edge)) {
  1046  	for _, e := range n.Edge {
  1047  		emit(e.Target, &scpb.Edge{
  1048  			Source:  n.Source,
  1049  			Target:  e.Target,
  1050  			Kind:    e.Kind,
  1051  			Ordinal: e.Ordinal,
  1052  		})
  1053  	}
  1054  }
  1055  
  1056  func nodeWithoutEdges(n *scpb.Node) *scpb.Node {
  1057  	return &scpb.Node{
  1058  		Source:  n.Source,
  1059  		Kind:    n.Kind,
  1060  		Subkind: n.Subkind,
  1061  		Fact:    n.Fact,
  1062  	}
  1063  }
  1064  
  1065  // reverseEdge emits the reverse of each *scpb.Edge, embedding the associated TargetNode.
  1066  func reverseEdge(src *spb.VName, nodeStream func(**scpb.Node) bool, edgeStream func(**scpb.Edge) bool, emit func(*spb.VName, *scpb.Edge)) {
  1067  	var node *scpb.Node
  1068  	if !nodeStream(&node) {
  1069  		node = &scpb.Node{}
  1070  	} else {
  1071  		node = nodeWithoutEdges(node)
  1072  	}
  1073  	node.Source = src
  1074  
  1075  	var e *scpb.Edge
  1076  	for edgeStream(&e) {
  1077  		emit(e.Source, &scpb.Edge{
  1078  			Source:     e.Source,
  1079  			TargetNode: node,
  1080  			Kind:       e.Kind,
  1081  			Ordinal:    e.Ordinal,
  1082  		})
  1083  	}
  1084  }
  1085  
  1086  // groupEdges emits *srvpb.PagedEdgeSets and *srvpb.EdgePages for a node and its forward/reverse
  1087  // edges.
  1088  func groupEdges(src *spb.VName, nodeStream func(**scpb.Node) bool, edgeStream, revStream func(**scpb.Edge) bool, emitSet func(string, *srvpb.PagedEdgeSet), emitPage func(string, *srvpb.EdgePage)) {
  1089  	set := &srvpb.PagedEdgeSet{}
  1090  	// TODO(schroederc): paging
  1091  
  1092  	var node *scpb.Node
  1093  	if nodeStream(&node) {
  1094  		node.Source = src
  1095  		set.Source = convertPipelineNode(node)
  1096  	} else {
  1097  		set.Source = &srvpb.Node{Ticket: kytheuri.ToString(src)}
  1098  	}
  1099  
  1100  	groups := make(map[string]*srvpb.EdgeGroup)
  1101  
  1102  	var edge *scpb.Edge
  1103  	for edgeStream(&edge) {
  1104  		kind := schema.GetEdgeKind(edge)
  1105  		g, ok := groups[kind]
  1106  		if !ok {
  1107  			g = &srvpb.EdgeGroup{Kind: kind}
  1108  			groups[kind] = g
  1109  			set.Group = append(set.Group, g)
  1110  		}
  1111  		g.Edge = append(g.Edge, &srvpb.EdgeGroup_Edge{
  1112  			Target:  convertPipelineNode(edge.TargetNode),
  1113  			Ordinal: edge.Ordinal,
  1114  		})
  1115  	}
  1116  	for revStream(&edge) {
  1117  		kind := "%" + schema.GetEdgeKind(edge) // encode reverse edge kind
  1118  		g, ok := groups[kind]
  1119  		if !ok {
  1120  			g = &srvpb.EdgeGroup{Kind: kind}
  1121  			groups[kind] = g
  1122  			set.Group = append(set.Group, g)
  1123  		}
  1124  		g.Edge = append(g.Edge, &srvpb.EdgeGroup_Edge{
  1125  			Target:  convertPipelineNode(edge.SourceNode),
  1126  			Ordinal: edge.Ordinal,
  1127  		})
  1128  	}
  1129  
  1130  	sort.Slice(set.Group, func(i, j int) bool { return set.Group[i].Kind < set.Group[j].Kind })
  1131  	for _, g := range set.Group {
  1132  		sort.Slice(g.Edge, func(i, j int) bool {
  1133  			return compare.Compare(g.Edge[i].Ordinal, g.Edge[j].Ordinal).
  1134  				AndThen(g.Edge[i].Target.Ticket, g.Edge[j].Target.Ticket) == compare.LT
  1135  		})
  1136  	}
  1137  
  1138  	emitSet("edgeSets:"+set.Source.Ticket, set)
  1139  }
  1140  
  1141  func (k *KytheBeam) getMarkedSources() beam.PCollection {
  1142  	if !k.markedSources.IsValid() {
  1143  		s := k.s.Scope("MarkedSources")
  1144  		k.markedSources = beam.Seq(s, k.nodes, &nodes.Filter{
  1145  			IncludeFacts: []string{facts.Code},
  1146  			IncludeEdges: []string{},
  1147  		}, parseMarkedSource)
  1148  	}
  1149  	return k.markedSources
  1150  }
  1151  
  1152  // Documents returns a Kythe documentation table derived from the Kythe input
  1153  // graph.  The beam.PCollection has elements of type KV<string,
  1154  // *srvpb.Document>.
  1155  func (k *KytheBeam) Documents() beam.PCollection {
  1156  	s := k.s.Scope("Documents")
  1157  
  1158  	docs := beam.Seq(s, k.nodes, &nodes.Filter{
  1159  		FilterByKind: []string{kinds.Doc},
  1160  		IncludeFacts: []string{facts.Text},
  1161  		IncludeEdges: []string{edges.Documents},
  1162  	}, nodeToDocs)
  1163  	markedSources := k.getMarkedSources()
  1164  	children := beam.Seq(s, k.nodes, &nodes.Filter{
  1165  		IncludeFacts: []string{},
  1166  		IncludeEdges: []string{edges.ChildOf},
  1167  	}, nodeToChildren)
  1168  
  1169  	return beam.ParDo(s, completeDocument, beam.CoGroupByKey(s, docs, markedSources, children))
  1170  }
  1171  
  1172  // completeDocument emits a single *srvpb.Document per *spb.VName source.
  1173  func completeDocument(key *spb.VName, docStream func(**srvpb.Document) bool, msStream func(**cpb.MarkedSource) bool, childStream func(**spb.VName) bool, emit func(string, *srvpb.Document)) {
  1174  	var doc *srvpb.Document
  1175  	if !docStream(&doc) {
  1176  		return
  1177  	}
  1178  	doc.Ticket = kytheuri.ToString(key)
  1179  
  1180  	msStream(&doc.MarkedSource) // embed MarkedSource, if available
  1181  
  1182  	var child *spb.VName
  1183  	for childStream(&child) {
  1184  		doc.ChildTicket = append(doc.ChildTicket, kytheuri.ToString(child))
  1185  	}
  1186  	sort.Strings(doc.ChildTicket)
  1187  
  1188  	// TODO(schroederc): add definition Links
  1189  	emit("docs:"+doc.Ticket, doc)
  1190  }
  1191  
  1192  // nodeToDocs emits a (*spb.VName, *srvpb.Document) pair for each
  1193  // /kythe/edge/documents edges from the given `doc` *scpb.Node.
  1194  func nodeToDocs(n *scpb.Node, emit func(*spb.VName, *srvpb.Document)) {
  1195  	d := &srvpb.Document{}
  1196  	for _, f := range n.Fact {
  1197  		if f.GetKytheName() == scpb.FactName_TEXT {
  1198  			d.RawText = string(f.Value)
  1199  			break
  1200  		}
  1201  	}
  1202  
  1203  	for _, e := range n.Edge {
  1204  		if e.GetKytheKind() == scpb.EdgeKind_DOCUMENTS {
  1205  			emit(e.Target, d)
  1206  		}
  1207  	}
  1208  }
  1209  
  1210  // parseMarkedSource parses the /kythe/code fact for each *scpb.Node.
  1211  func parseMarkedSource(n *scpb.Node, emit func(*spb.VName, *cpb.MarkedSource)) error {
  1212  	for _, f := range n.Fact {
  1213  		if f.GetKytheName() == scpb.FactName_CODE {
  1214  			var ms cpb.MarkedSource
  1215  			if err := proto.Unmarshal(f.Value, &ms); err != nil {
  1216  				return err
  1217  			}
  1218  			emit(n.Source, &ms)
  1219  			break
  1220  		}
  1221  	}
  1222  	return nil
  1223  }
  1224  
  1225  // nodeToChildren emits a (parent, child) pair for each /kythe/edge/childof edge
  1226  // per *scpb.Node.
  1227  func nodeToChildren(n *scpb.Node, emit func(*spb.VName, *spb.VName)) {
  1228  	for _, e := range n.Edge {
  1229  		if e.GetKytheKind() == scpb.EdgeKind_CHILD_OF {
  1230  			emit(e.Target, n.Source) // parent -> child
  1231  		}
  1232  	}
  1233  }