kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/util/metadata/metadata.go (about)

     1  /*
     2   * Copyright 2017 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package metadata provides support code for processing Kythe metadata
    18  // records, of the type generated by instrumented code generators for
    19  // cross-language linkage.
    20  package metadata // import "kythe.io/kythe/go/util/metadata"
    21  
    22  import (
    23  	"encoding/json"
    24  	"errors"
    25  	"fmt"
    26  	"io"
    27  	"strconv"
    28  	"strings"
    29  
    30  	"kythe.io/kythe/go/util/schema/edges"
    31  
    32  	"google.golang.org/protobuf/encoding/protojson"
    33  
    34  	protopb "google.golang.org/protobuf/types/descriptorpb"
    35  	spb "kythe.io/kythe/proto/storage_go_proto"
    36  )
    37  
    38  // TODO(fromberger): Add a link to the format documentation here.
    39  
    40  // Rules are a collection of metadata rules.
    41  type Rules []Rule
    42  
    43  // MarshalJSON encodes the specified rule set as a JSON file.
    44  func (rs Rules) MarshalJSON() ([]byte, error) {
    45  	f := file{
    46  		Type: fileType,
    47  		Meta: make([]rule, len(rs)),
    48  	}
    49  	for i, r := range rs {
    50  		kind := r.EdgeOut
    51  		if r.Reverse {
    52  			kind = edges.Mirror(kind)
    53  		}
    54  		rtype := "nop"
    55  		if r.EdgeIn == edges.DefinesBinding {
    56  			rtype = "anchor_defines"
    57  		}
    58  		var v json.RawMessage
    59  		if r.VName != nil {
    60  			var err error
    61  			v, err = protojson.Marshal(r.VName)
    62  			if err != nil {
    63  				return nil, err
    64  			}
    65  		}
    66  		f.Meta[i] = rule{
    67  			Type:     rtype,
    68  			Begin:    r.Begin,
    69  			End:      r.End,
    70  			VName:    v,
    71  			Edge:     kind,
    72  			Semantic: r.Semantic,
    73  		}
    74  	}
    75  	return json.Marshal(f)
    76  }
    77  
    78  // Semantic is a reexport of protopb.GeneratedCodeInfo_Annotation_Semantic
    79  type Semantic = protopb.GeneratedCodeInfo_Annotation_Semantic
    80  
    81  // Reexport of the Semantic enum values
    82  var (
    83  	SemanticNone  Semantic = protopb.GeneratedCodeInfo_Annotation_NONE
    84  	SemanticSet   Semantic = protopb.GeneratedCodeInfo_Annotation_SET
    85  	SemanticAlias Semantic = protopb.GeneratedCodeInfo_Annotation_ALIAS
    86  )
    87  
    88  // A Rule denotes a single metadata rule, associating type linkage information
    89  // for an anchor spanning a given range of text.
    90  type Rule struct {
    91  	// The Begin and End fields represent a half-closed interval of byte
    92  	// positions to match. Begin is inclusive, End is exclusive.
    93  	Begin, End int
    94  
    95  	EdgeIn  string     // edge kind to match over the anchor spanned
    96  	EdgeOut string     // outbound edge kind to emit
    97  	VName   *spb.VName // the vname to create an edge to or from
    98  	Reverse bool       // whether to draw to vname (false) or from it (true)
    99  
   100  	Semantic *Semantic // whether to apply special semantics.
   101  }
   102  
   103  // The types below are intermediate structures used for JSON marshaling.
   104  
   105  const fileType = "kythe0" // protocol marker
   106  
   107  // A file represents an encoded set of rules in JSON notation.
   108  type file struct {
   109  	Type string `json:"type"` // required: must equal fileType
   110  	Meta []rule `json:"meta,omitempty"`
   111  }
   112  
   113  // A rule is the encoded format of a single rule.
   114  type rule struct {
   115  	Type  string          `json:"type"`
   116  	Begin int             `json:"begin"`
   117  	End   int             `json:"end"`
   118  	Edge  string          `json:"edge,omitempty"`
   119  	VName json.RawMessage `json:"vname,omitempty"`
   120  
   121  	Semantic *Semantic `json:"semantic,omitempty"`
   122  }
   123  
   124  // Parse parses a single JSON metadata object from r and returns the
   125  // corresponding rules. It is an error if there are extra data after the
   126  // metadata object, or if the type tag of the object does not match the current
   127  // format code.
   128  func Parse(r io.Reader) (Rules, error) {
   129  	dec := json.NewDecoder(r)
   130  	var f file
   131  	if err := dec.Decode(&f); err != nil {
   132  		return nil, fmt.Errorf("metadata: invalid file: %v", err)
   133  	} else if _, err := dec.Token(); err != io.EOF {
   134  		return nil, errors.New("metadata: extra junk at end of input")
   135  	} else if f.Type != fileType {
   136  		return nil, fmt.Errorf("metadata: wrong type tag: %q", f.Type)
   137  	}
   138  
   139  	rs := make(Rules, len(f.Meta))
   140  	for i, meta := range f.Meta {
   141  		var v *spb.VName
   142  		if len(meta.VName) != 0 {
   143  			var msg spb.VName
   144  			if err := protojson.Unmarshal(meta.VName, &msg); err != nil {
   145  				return nil, err
   146  			}
   147  			v = &msg
   148  		}
   149  		rs[i] = Rule{
   150  			Begin:    meta.Begin,
   151  			End:      meta.End,
   152  			EdgeOut:  edges.Canonical(meta.Edge),
   153  			Reverse:  edges.IsReverse(meta.Edge),
   154  			VName:    v,
   155  			Semantic: meta.Semantic,
   156  		}
   157  		switch t := meta.Type; t {
   158  		case "nop":
   159  			// ok, no special behaviour
   160  		case "anchor_defines":
   161  			rs[i].EdgeIn = edges.DefinesBinding
   162  		default:
   163  			return nil, fmt.Errorf("metadata: unknown rule type: %q", t)
   164  		}
   165  	}
   166  	return rs, nil
   167  }
   168  
   169  // FromGeneratedCodeInfo constructs a set of rules from the corresponding
   170  // protobuf descriptor message and the vname of the metadata file from which
   171  // the generated descriptor was loaded.
   172  func FromGeneratedCodeInfo(msg *protopb.GeneratedCodeInfo, vname *spb.VName) Rules {
   173  	rs := make(Rules, len(msg.Annotation))
   174  	for i, anno := range msg.Annotation {
   175  		// Convert the path to a dot-separated string, e.g., 1.0.3.2,
   176  		// for use in the vname signature.
   177  		sig := make([]string, len(anno.Path))
   178  		for i, elt := range anno.Path {
   179  			sig[i] = strconv.Itoa(int(elt))
   180  		}
   181  
   182  		// TODO(fromberger): Work out how to derive the correct corpus and root
   183  		// labels. When the protobuf source file is in the same corpus as its
   184  		// metadata, this will work as-is.
   185  		//
   186  		// If the protobuf inputs live in a different corpus, it will be
   187  		// necessary to make the extractor map the metadata file to the correct
   188  		// corpus and root at build time. Since the metadata file does not get
   189  		// pointed to directly, this ensures we get the right contact.
   190  		//
   191  		// This does NOT solve how to deal with generated .proto files, but
   192  		// that is a much less common case, and we can address it separately.
   193  		vname := &spb.VName{
   194  			Corpus:    vname.GetCorpus(),
   195  			Root:      vname.GetRoot(),
   196  			Path:      anno.GetSourceFile(),
   197  			Language:  "protobuf",
   198  			Signature: strings.Join(sig, "."),
   199  		}
   200  		rs[i] = Rule{
   201  			EdgeIn:   edges.DefinesBinding,
   202  			EdgeOut:  edges.Generates,
   203  			Reverse:  true,
   204  			Begin:    int(anno.GetBegin()),
   205  			End:      int(anno.GetEnd()),
   206  			VName:    vname,
   207  			Semantic: anno.Semantic,
   208  		}
   209  	}
   210  	return rs
   211  }