kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/storage/entryset/entryset.go (about)

     1  /*
     2   * Copyright 2017 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package entryset implements a compact representation for sets of Kythe entry
    18  // messages in the format emitted by indexers.
    19  //
    20  // Call New to construct an empty set, and use the Add method to add entries:
    21  //
    22  //	set := entryset.New(nil)
    23  //	for entry := range readEntries() {
    24  //	   if err := set.Add(entry); err != nil {
    25  //	      log.Exitf("Invalid entry: %v", err)
    26  //	   }
    27  //	}
    28  //
    29  // Entries are automatically deduplicated. You can traverse the contents of an
    30  // entry set with the Visit method, which takes a callback:
    31  //
    32  //	set.Visit(func(e *storagepb.Entry) bool {
    33  //	   process(e)
    34  //	   return wantMore
    35  //	})
    36  //
    37  // An entry set may or may not be "canonical": A canonical entry set has the
    38  // property that calling its Visit method will deliver all the entries in the
    39  // canonical entry order (http://www.kythe.io/docs/kythe-storage.html).  A
    40  // newly-created entry set is canonical; a call to Add may invalidate this
    41  // status. Call the Canonicalize method to canonicalize an entryset.
    42  //
    43  //	set := entryset.New(nil) // set is canonical
    44  //	set.Add(e)               // set is no longer canonical
    45  //	set.Canonicalize()       // set is (once again) canonical
    46  //
    47  // An entryset can be converted into a kythe.storage.EntrySet protobuf message
    48  // using the Encode method. This message is defined in entryset.proto. You can
    49  // construct a Set from an EntrySet message using Decode:
    50  //
    51  //	pb := old.Encode()
    52  //	new, err := entryset.Decode(pb)
    53  //	if err != nil {
    54  //	  log.Exitf("Invalid entryset message: %v", err)
    55  //	}
    56  //
    57  // When rendered in wire format, the protobuf encoding is considerably more
    58  // compact than a naive entry stream.
    59  package entryset // import "kythe.io/kythe/go/storage/entryset"
    60  
    61  import (
    62  	"fmt"
    63  	"io"
    64  	"sort"
    65  
    66  	"kythe.io/kythe/go/util/kytheuri"
    67  	"kythe.io/kythe/go/util/schema/edges"
    68  
    69  	"github.com/pkg/errors"
    70  	"google.golang.org/protobuf/proto"
    71  
    72  	espb "kythe.io/kythe/proto/entryset_go_proto"
    73  	intpb "kythe.io/kythe/proto/internal_go_proto"
    74  	spb "kythe.io/kythe/proto/storage_go_proto"
    75  )
    76  
    77  // A Set represents a set of unique entries.
    78  type Set struct {
    79  	syms []string // lazily-initialized symbol lookup
    80  	nids []node   // lazily-initialized node lookup
    81  
    82  	symid map[string]id
    83  	nodes map[node]nid
    84  	facts map[nid]map[fact]struct{}
    85  	edges map[nid]map[edge]struct{}
    86  	canon bool // true if this set is canonicalized
    87  	opts  *Options
    88  
    89  	addCalls  int
    90  	addErrors int
    91  }
    92  
    93  // Options provide configuration settings for a Set.
    94  // A nil *Options provides sensible default values.
    95  type Options struct {
    96  	// When encoding a set to wire format, any symbol longer than this number
    97  	// of bytes will be split into chunks of at most this length.
    98  	// If ≤ 0, no splitting is performed.
    99  	MaxSymbolBytes int
   100  }
   101  
   102  func (o *Options) maxSymbolBytes() int {
   103  	if o == nil || o.MaxSymbolBytes < 0 {
   104  		return 0
   105  	}
   106  	return o.MaxSymbolBytes
   107  }
   108  
   109  // New constructs a new Set containing no entries.
   110  func New(opts *Options) *Set {
   111  	s := &Set{
   112  		symid: make(map[string]id),
   113  		nodes: make(map[node]nid),
   114  		facts: make(map[nid]map[fact]struct{}),
   115  		edges: make(map[nid]map[edge]struct{}),
   116  		opts:  opts,
   117  	}
   118  	s.enter("") // pre-assign "" as ID 0
   119  	s.canon = true
   120  	return s
   121  }
   122  
   123  // Stats carry statistics about the contents of a Set.
   124  type Stats struct {
   125  	Adds    int // number of times Add was successfully invoked
   126  	Errors  int // number of times Add reported an error
   127  	Nodes   int // count of unique nodes stored
   128  	Facts   int // total number of facts stored
   129  	Edges   int // total number of edges stored
   130  	Symbols int // total count of unique symbols stored
   131  }
   132  
   133  // Stats returns current statistics for s.
   134  func (s *Set) Stats() *Stats {
   135  	stats := &Stats{
   136  		Adds:    s.addCalls,
   137  		Errors:  s.addErrors,
   138  		Nodes:   len(s.nodes),
   139  		Symbols: len(s.symid),
   140  	}
   141  	for _, facts := range s.facts {
   142  		stats.Facts += len(facts)
   143  	}
   144  	for _, edges := range s.edges {
   145  		stats.Edges += len(edges)
   146  	}
   147  	return stats
   148  }
   149  
   150  // Add adds the specified entry to the set, returning an error if the entry is
   151  // structurally invalid. An invalid entry does not corrupt the state of the
   152  // set, such entries are simply discarded. It is therefore safe to ignore an
   153  // error from this method if you want to drop invalid data.
   154  func (s *Set) Add(e *spb.Entry) error {
   155  	if e == nil {
   156  		s.addErrors++
   157  		return errors.New("entryset: nil entry")
   158  	} else if (e.Target == nil) != (e.EdgeKind == "") {
   159  		s.addErrors++
   160  		return fmt.Errorf("entryset: invalid entry: target=%v/kind=%v", e.Target == nil, e.EdgeKind == "")
   161  	}
   162  	s.addCalls++
   163  	src := s.addVName(e.Source)
   164  	if e.Target != nil {
   165  		s.addEdge(src, edge{
   166  			kind:   s.enter(e.EdgeKind),
   167  			target: s.addVName(e.Target),
   168  		})
   169  	} else {
   170  		s.addFact(src, fact{
   171  			name:  s.enter(e.FactName),
   172  			value: s.enter(string(e.FactValue)),
   173  		})
   174  	}
   175  	return nil
   176  }
   177  
   178  // Sources groups the entries of the set into Source messages and invokes f for
   179  // each one. If f returns false the visit is aborted.
   180  func (s *Set) Sources(f func(*intpb.Source) bool) {
   181  	for i := 0; i < len(s.nodes); i++ {
   182  		n := s.node(nid(i))
   183  		nid := nid(i)
   184  		src := &intpb.Source{
   185  			Ticket:     s.ticket(n),
   186  			Facts:      make(map[string][]byte),
   187  			EdgeGroups: make(map[string]*intpb.Source_EdgeGroup),
   188  		}
   189  		for fact := range s.facts[nid] {
   190  			src.Facts[s.symbol(fact.name)] = []byte(s.symbol(fact.value))
   191  		}
   192  		for edge := range s.edges[nid] {
   193  			kind, ordinal, _ := edges.ParseOrdinal(s.symbol(edge.kind))
   194  			eg := src.EdgeGroups[kind]
   195  			if eg == nil {
   196  				eg = new(intpb.Source_EdgeGroup)
   197  				src.EdgeGroups[kind] = eg
   198  			}
   199  			eg.Edges = append(eg.Edges, &intpb.Source_Edge{
   200  				Ticket:  s.ticket(s.node(edge.target)),
   201  				Ordinal: int32(ordinal),
   202  			})
   203  		}
   204  		if !f(src) {
   205  			return // early exit
   206  		}
   207  	}
   208  }
   209  
   210  // Visit calls f with each entry in the set. If s is canonical, entries are
   211  // delivered in canonical order; otherwise the order is unspecified.  If f
   212  // returns false the visit is aborted.
   213  func (s *Set) Visit(f func(*spb.Entry) bool) {
   214  	// Scan nodes in order by nid. If s is canonical, they will also be
   215  	// lexicographically ordered.
   216  	for i := 0; i < len(s.nodes); i++ {
   217  		n := s.node(nid(i))
   218  		src := s.vname(n)
   219  		nid := nid(i)
   220  
   221  		// Deliver facts. If s is canonical, they will be correctly ordered.
   222  		facts := sortedFacts(s.facts[nid])
   223  		for _, fact := range facts {
   224  			if !f(&spb.Entry{
   225  				Source:    src,
   226  				FactName:  s.symbol(fact.name),
   227  				FactValue: []byte(s.symbol(fact.value)),
   228  			}) {
   229  				return // early exit
   230  			}
   231  		}
   232  
   233  		// Deliver edges. If s is canonical, they will be correctly ordered.
   234  		edges := sortedEdges(s.edges[nid])
   235  		for _, edge := range edges {
   236  			tgt := s.vname(s.node(edge.target))
   237  			if !f(&spb.Entry{
   238  				Source:   src,
   239  				Target:   tgt,
   240  				EdgeKind: s.symbol(edge.kind),
   241  			}) {
   242  				return // early exit
   243  			}
   244  		}
   245  	}
   246  }
   247  
   248  // Canonicalize modifies s in-place to be in canonical form, and returns s to
   249  // permit chaining.  If s is already in canonical form, it is returned
   250  // unmodified.
   251  func (s *Set) Canonicalize() *Set {
   252  	if s.canon {
   253  		return s
   254  	}
   255  
   256  	// Copy out the symbol table and order it lexicographically, keeping track
   257  	// of the inverse permutation. The inverse will give us what we need to
   258  	// remap the rest of the data.
   259  	syms := make([]string, len(s.symid))
   260  	for sym, id := range s.symid {
   261  		syms[int(id)] = sym
   262  	}
   263  	sinv := sortInverse(sort.StringSlice(syms))
   264  	smap := func(i id) id { return id(sinv[int(i)]) }
   265  
   266  	// Set up the new symbol table...
   267  	out := New(s.opts)
   268  	out.addCalls = s.addCalls
   269  	out.addErrors = s.addErrors
   270  	for i, sym := range syms {
   271  		if id := out.putsym(sym); int(id) != i {
   272  			panic("symbol table corrupted")
   273  		}
   274  	}
   275  
   276  	// Copy out the nodes table and rewrite all the values in terms of the
   277  	// remapped symbol table.
   278  	nodes := make([]node, len(s.nodes))
   279  	for n, nid := range s.nodes {
   280  		nodes[int(nid)] = node{
   281  			signature: smap(n.signature),
   282  			corpus:    smap(n.corpus),
   283  			root:      smap(n.root),
   284  			path:      smap(n.path),
   285  			language:  smap(n.language),
   286  		}
   287  	}
   288  	ninv := sortInverse(byNode(nodes))
   289  	nmap := func(n nid) nid { return nid(ninv[int(n)]) }
   290  
   291  	// Set up the new nodes table...
   292  	for i, node := range nodes {
   293  		if nid := out.addNode(node); int(nid) != i {
   294  			panic("node table corrupted")
   295  		}
   296  	}
   297  
   298  	// Update all the facts...
   299  	for oid, facts := range s.facts {
   300  		nid := nmap(oid)
   301  		for f := range facts {
   302  			out.addFact(nid, fact{
   303  				name:  smap(f.name),
   304  				value: smap(f.value),
   305  			})
   306  		}
   307  	}
   308  
   309  	// Update all the edges...
   310  	for oid, edges := range s.edges {
   311  		nid := nmap(oid)
   312  		for e := range edges {
   313  			out.addEdge(nid, edge{
   314  				kind:   smap(e.kind),
   315  				target: nmap(e.target),
   316  			})
   317  		}
   318  	}
   319  	*s = *out
   320  	s.canon = true
   321  	return s
   322  }
   323  
   324  // Encode constructs a canonical version of s and renders it into a
   325  // kythe.storage.EntrySet protobuf message.
   326  func (s *Set) Encode() *espb.EntrySet {
   327  	s.Canonicalize()
   328  	es := &espb.EntrySet{
   329  		Nodes:      make([]*espb.EntrySet_Node, len(s.nodes)),
   330  		FactGroups: make([]*espb.EntrySet_FactGroup, len(s.nodes)),
   331  		EdgeGroups: make([]*espb.EntrySet_EdgeGroup, len(s.nodes)),
   332  		Symbols:    make([]*espb.EntrySet_String, len(s.symid)-1), // skip ""
   333  	}
   334  	for i := 0; i < len(s.nodes); i++ {
   335  		nid := nid(i)
   336  		n := s.node(nid)
   337  		es.Nodes[i] = &espb.EntrySet_Node{
   338  			Corpus:    int32(n.corpus),
   339  			Language:  int32(n.language),
   340  			Path:      int32(n.path),
   341  			Root:      int32(n.root),
   342  			Signature: int32(n.signature),
   343  		}
   344  
   345  		facts := sortedFacts(s.facts[nid])
   346  		es.FactGroups[i] = &espb.EntrySet_FactGroup{
   347  			Facts: make([]*espb.EntrySet_Fact, len(facts)),
   348  		}
   349  		for j, f := range facts {
   350  			es.FactGroups[i].Facts[j] = &espb.EntrySet_Fact{
   351  				Name:  int32(f.name),
   352  				Value: int32(f.value),
   353  			}
   354  		}
   355  
   356  		edges := sortedEdges(s.edges[nid])
   357  		es.EdgeGroups[i] = &espb.EntrySet_EdgeGroup{
   358  			Edges: make([]*espb.EntrySet_Edge, len(edges)),
   359  		}
   360  		for j, e := range edges {
   361  			es.EdgeGroups[i].Edges[j] = &espb.EntrySet_Edge{
   362  				Kind:   int32(e.kind),
   363  				Target: int32(e.target),
   364  			}
   365  		}
   366  	}
   367  
   368  	// Pack the string table.
   369  	prev := ""
   370  	for i := 1; i < len(s.symid); i++ { // start at 1 to skip ""
   371  		sym := s.symbol(id(i))
   372  		pfx := lcp(prev, sym)
   373  		es.Symbols[i-1] = &espb.EntrySet_String{
   374  			Prefix: int32(pfx),
   375  			Suffix: []byte(sym[pfx:]),
   376  		}
   377  		prev = sym
   378  	}
   379  	return es
   380  }
   381  
   382  // WriteTo writes s to w as a wire-format EntrySet message.
   383  func (s *Set) WriteTo(w io.Writer) (int64, error) {
   384  	bits, err := proto.Marshal(s.Encode())
   385  	if err != nil {
   386  		return 0, err
   387  	}
   388  	nw, err := w.Write(bits)
   389  	return int64(nw), err
   390  }
   391  
   392  // Unmarshal unmarshals a wire-format EntrySet message into a *Set.
   393  func Unmarshal(data []byte) (*Set, error) {
   394  	var es espb.EntrySet
   395  	if err := proto.Unmarshal(data, &es); err != nil {
   396  		return nil, err
   397  	}
   398  	return Decode(&es)
   399  }
   400  
   401  // Decode constructs a set from a protobuf representation.
   402  // The resulting set will be canonical if the encoding was; if the message was
   403  // encoded by the Encode method of a *Set it will be so.
   404  func Decode(es *espb.EntrySet) (*Set, error) {
   405  	s := New(nil)
   406  
   407  	// Sanity checks: There must be equal numbers of nodes, fact groups, and
   408  	// edge groups in the message. This simplifies the scanning logic below.
   409  	n := len(es.Nodes)
   410  	if len(es.FactGroups) != n || len(es.EdgeGroups) != n {
   411  		return nil, fmt.Errorf("entryset: invalid counts: %d nodes, %d fact groups, %d edge groups",
   412  			n, len(es.FactGroups), len(es.EdgeGroups))
   413  	}
   414  
   415  	// Unpack the string table. The empty string was already added by New.
   416  	prev := ""
   417  	for i, sym := range es.Symbols {
   418  		n := int(sym.Prefix)
   419  		if n > len(prev) {
   420  			return nil, fmt.Errorf("entryset: invalid symbol table: prefix length %d > %d", n, len(prev))
   421  		}
   422  		cur := prev[:n] + string(sym.Suffix)
   423  		if sid := s.enter(cur); int(sid) != i+1 {
   424  			return nil, fmt.Errorf("entryset: symbol index error %d ≠ %d", sid, i+1)
   425  		}
   426  		prev = cur
   427  	}
   428  
   429  	// Unpack the nodes, facts, and edges.
   430  	for i := 0; i < n; i++ {
   431  		new := node{
   432  			corpus:    id(es.Nodes[i].Corpus),
   433  			language:  id(es.Nodes[i].Language),
   434  			path:      id(es.Nodes[i].Path),
   435  			root:      id(es.Nodes[i].Root),
   436  			signature: id(es.Nodes[i].Signature),
   437  		}
   438  		if err := s.checkBounds(new.corpus, new.language, new.path, new.root, new.signature); err != nil {
   439  			return nil, err
   440  		}
   441  		cur := s.addNode(new)
   442  		if int(cur) != i {
   443  			return nil, fmt.Errorf("entryset: node index error: %d ≠ %d", cur, i)
   444  		}
   445  		for _, f := range es.FactGroups[i].GetFacts() {
   446  			if f != nil {
   447  				new := fact{name: id(f.Name), value: id(f.Value)}
   448  				if err := s.checkBounds(new.name, new.value); err != nil {
   449  					return nil, err
   450  				}
   451  				s.addFact(cur, new)
   452  			}
   453  		}
   454  		for _, e := range es.EdgeGroups[i].GetEdges() {
   455  			if e != nil {
   456  				new := edge{kind: id(e.Kind), target: nid(e.Target)}
   457  				if err := s.checkBounds(new.kind); err != nil {
   458  					return nil, err
   459  				} else if t := int(new.target); t < 0 || t >= n {
   460  					return nil, fmt.Errorf("entryset: target id %d out of bounds", t)
   461  				}
   462  				s.addEdge(cur, new)
   463  			}
   464  		}
   465  	}
   466  	return s, nil
   467  }
   468  
   469  // lcp returns the length of the longest common prefix of a and b, in bytes.
   470  func lcp(a, b string) int {
   471  	i := 0
   472  	for i < len(a) && i < len(b) && a[i] == b[i] {
   473  		i++
   474  	}
   475  	return i
   476  }
   477  
   478  // sortInverse sorts v lexicographically in-place, and returns the inverse
   479  // permutation of v, so that inv[i] is the new offset of the value that was
   480  // originally at offset i in the unsorted collection.  Thus, v'[inv[i]] = v[i]
   481  // for all 0 ≤ i < v.Len().
   482  func sortInverse(v sort.Interface) (inv []int) {
   483  	p := rperm{
   484  		vals: v,
   485  		perm: make([]int, v.Len()),
   486  		inv:  make([]int, v.Len()),
   487  	}
   488  	for i := range p.perm {
   489  		p.inv[i] = i
   490  		p.perm[i] = i
   491  	}
   492  	sort.Sort(p)
   493  	return p.inv
   494  }
   495  
   496  // rperm implements sort.Interface by dispatching to another sortable type.  In
   497  // addition, it computes an inverse permutation mapping.
   498  type rperm struct {
   499  	vals sort.Interface
   500  
   501  	perm []int // perm[i] is the original offset of the string now at offset i
   502  	inv  []int // inv[i] is the current offset of the string originally at offset i
   503  }
   504  
   505  func (r rperm) Len() int           { return r.vals.Len() }
   506  func (r rperm) Less(i, j int) bool { return r.vals.Less(i, j) }
   507  
   508  func (r rperm) Swap(i, j int) {
   509  	r.vals.Swap(i, j)
   510  	r.perm[i], r.perm[j] = r.perm[j], r.perm[i]
   511  	r.inv[r.perm[i]] = i
   512  	r.inv[r.perm[j]] = j
   513  }
   514  
   515  type byNode []node
   516  
   517  func (b byNode) Len() int           { return len(b) }
   518  func (b byNode) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
   519  func (b byNode) Less(i, j int) bool { return b[i].compare(b[j]) < 0 }
   520  
   521  type byFact []fact
   522  
   523  func (b byFact) Len() int           { return len(b) }
   524  func (b byFact) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
   525  func (b byFact) Less(i, j int) bool { return b[i].compare(b[j]) < 0 }
   526  
   527  type byEdge []edge
   528  
   529  func (b byEdge) Len() int           { return len(b) }
   530  func (b byEdge) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
   531  func (b byEdge) Less(i, j int) bool { return b[i].compare(b[j]) < 0 }
   532  
   533  type id int  // symbol identifier
   534  type nid int // node identifier
   535  
   536  type node struct {
   537  	signature id
   538  	corpus    id
   539  	root      id
   540  	path      id
   541  	language  id
   542  }
   543  
   544  func (n node) compare(o node) int {
   545  	for _, v := range [...]id{
   546  		n.corpus - o.corpus,
   547  		n.language - o.language,
   548  		n.path - o.path,
   549  		n.root - o.root,
   550  		n.signature - o.signature,
   551  	} {
   552  		if v != 0 {
   553  			return int(v)
   554  		}
   555  	}
   556  	return 0
   557  }
   558  
   559  type fact struct{ name, value id }
   560  
   561  func (f fact) compare(o fact) int {
   562  	if n := f.name - o.name; n != 0 {
   563  		return int(n)
   564  	}
   565  	return int(f.value - o.value)
   566  }
   567  
   568  // sortedFacts unpacks the keys of m and returns them canonically ordered.
   569  func sortedFacts(m map[fact]struct{}) []fact {
   570  	facts := make([]fact, 0, len(m))
   571  	for f := range m {
   572  		facts = append(facts, f)
   573  	}
   574  	sort.Sort(byFact(facts))
   575  	return facts
   576  }
   577  
   578  type edge struct {
   579  	kind   id
   580  	target nid
   581  }
   582  
   583  func (e edge) compare(o edge) int {
   584  	if n := e.kind - o.kind; n != 0 {
   585  		return int(n)
   586  	}
   587  	return int(e.target - o.target)
   588  }
   589  
   590  // sortedEdges unpacks the keys of m and returns them canonically ordered.
   591  func sortedEdges(m map[edge]struct{}) []edge {
   592  	edges := make([]edge, 0, len(m))
   593  	for e := range m {
   594  		edges = append(edges, e)
   595  	}
   596  	sort.Sort(byEdge(edges))
   597  	return edges
   598  }
   599  
   600  func (s *Set) putsym(sym string) id {
   601  	if id, ok := s.symid[sym]; ok {
   602  		return id
   603  	}
   604  	s.syms = nil    // new data resets the lookup
   605  	s.canon = false // new data invalidates canonical form
   606  	next := id(len(s.symid))
   607  	s.symid[sym] = next
   608  	return next
   609  }
   610  
   611  // enter adds a string to the symbol table and returns its ID.
   612  // Duplicate symbols are given the same ID each time.
   613  func (s *Set) enter(sym string) id {
   614  	next := s.putsym(sym)
   615  	if int(next) < len(s.symid)-1 {
   616  		return next
   617  	}
   618  
   619  	// If the symbol exceeds the length cap, add prefixes of it to the table so
   620  	// that prefix compression will fall under the cap. For example, if sym is
   621  	//
   622  	//    01234566789abcdef01234566789abcdef01234566789abc
   623  	//    ^^^^^^^^^^^^^^^^^^^|
   624  	//                       cap
   625  	//
   626  	// then we will add prefixes at multiples of cap until the last one fits:
   627  	//
   628  	//    01234566789abcdef01234566789abcdef01234566789abc ← sym
   629  	//    01234566789abcdef01234566789abcdef012345
   630  	//    01234566789abcdef012                   △ 2*cap
   631  	//                       △ 1*cap
   632  	//
   633  	// When canonicalized and prefix-coded, these will collapse to:
   634  	//
   635  	//    01234566789abcdef012
   636  	//    <1*cap>34566789abcdef012345
   637  	//    <2*cap>566789abc
   638  	//
   639  	if cap := s.opts.maxSymbolBytes(); cap > 0 {
   640  		for n := len(sym) / cap; n > 0; n-- {
   641  			s.putsym(sym[:n*cap])
   642  		}
   643  	}
   644  	return next
   645  }
   646  
   647  // symbol returns the string corresponding to the given symbol ID.
   648  func (s *Set) symbol(id id) string {
   649  	// If necessary, (re)initialize the lookup table.
   650  	if s.syms == nil {
   651  		s.syms = make([]string, len(s.symid))
   652  		for sym, id := range s.symid {
   653  			s.syms[int(id)] = sym
   654  		}
   655  	}
   656  	return s.syms[int(id)]
   657  }
   658  
   659  // node returns the node corresponding to the given node ID.
   660  func (s *Set) node(nid nid) node {
   661  	// If necessary, (re)initialize the lookup table.
   662  	if s.nids == nil {
   663  		s.nids = make([]node, len(s.nodes))
   664  		for n, nid := range s.nodes {
   665  			s.nids[int(nid)] = n
   666  		}
   667  	}
   668  	return s.nids[int(nid)]
   669  }
   670  
   671  // addNode adds a (possibly new) node to the set, and returns its node ID.
   672  // Duplicate nodes are given the same ID each time.
   673  func (s *Set) addNode(n node) nid {
   674  	if id, ok := s.nodes[n]; ok {
   675  		return id
   676  	}
   677  	s.nids = nil    // new data resets the lookup
   678  	s.canon = false // new data invalidates canonical form
   679  	next := nid(len(s.nodes))
   680  	s.nodes[n] = next
   681  	return next
   682  }
   683  
   684  // addVName constructs a node from v and passes it to addNode.
   685  func (s *Set) addVName(v *spb.VName) nid {
   686  	return s.addNode(node{
   687  		signature: s.enter(v.Signature),
   688  		corpus:    s.enter(v.Corpus),
   689  		root:      s.enter(v.Root),
   690  		path:      s.enter(v.Path),
   691  		language:  s.enter(v.Language),
   692  	})
   693  }
   694  
   695  // vname returns a VName protobuf equivalent to n.
   696  func (s *Set) vname(n node) *spb.VName {
   697  	return &spb.VName{
   698  		Signature: s.symbol(n.signature),
   699  		Corpus:    s.symbol(n.corpus),
   700  		Path:      s.symbol(n.path),
   701  		Root:      s.symbol(n.root),
   702  		Language:  s.symbol(n.language),
   703  	}
   704  }
   705  
   706  // ticket returns a Kythe ticket equivalent to n.
   707  func (s *Set) ticket(n node) string {
   708  	return (&kytheuri.URI{
   709  		Signature: s.symbol(n.signature),
   710  		Corpus:    s.symbol(n.corpus),
   711  		Path:      s.symbol(n.path),
   712  		Root:      s.symbol(n.root),
   713  		Language:  s.symbol(n.language),
   714  	}).String()
   715  }
   716  
   717  // addFact adds f as a fact related to n.
   718  func (s *Set) addFact(n nid, f fact) {
   719  	if s.facts[n] == nil {
   720  		s.facts[n] = map[fact]struct{}{f: struct{}{}}
   721  		s.canon = false
   722  	} else if _, ok := s.facts[n][f]; !ok {
   723  		s.facts[n][f] = struct{}{}
   724  		s.canon = false
   725  	}
   726  }
   727  
   728  // addEdge adds e as an outbound edges from n.
   729  func (s *Set) addEdge(n nid, e edge) {
   730  	if s.edges[n] == nil {
   731  		s.edges[n] = map[edge]struct{}{e: struct{}{}}
   732  		s.canon = false
   733  	} else if _, ok := s.edges[n][e]; !ok {
   734  		s.edges[n][e] = struct{}{}
   735  		s.canon = false
   736  	}
   737  }
   738  
   739  // checkBounds returns nil if all the symbol ids given are in bounds.  This
   740  // requires that the symbol table has already been populated.
   741  func (s *Set) checkBounds(ids ...id) error {
   742  	for _, id := range ids {
   743  		if id < 0 || int(id) >= len(s.symid) {
   744  			return fmt.Errorf("entryset: symid %d out of bounds", id)
   745  		}
   746  	}
   747  	return nil
   748  }