kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/util/vnameutil/rewrite.go (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package vnameutil provides utilities for generating consistent VNames from
    18  // common path-like values (e.g., filenames, import paths).
    19  package vnameutil // import "kythe.io/kythe/go/util/vnameutil"
    20  
    21  import (
    22  	"bytes"
    23  	"encoding/json"
    24  	"fmt"
    25  	"io"
    26  	"os"
    27  	"regexp"
    28  	"strings"
    29  
    30  	"google.golang.org/protobuf/encoding/protojson"
    31  	"google.golang.org/protobuf/proto"
    32  
    33  	spb "kythe.io/kythe/proto/storage_go_proto"
    34  )
    35  
    36  // A Rule associates a regular expression pattern with a VName template.  A
    37  // Rule can be applied to a string to produce a VName.
    38  type Rule struct {
    39  	*regexp.Regexp // A pattern to match against an input string
    40  	*spb.VName     // A template to populate with matches from the input
    41  }
    42  
    43  // Apply reports whether input matches the regexp associated with r.  If so, it
    44  // returns a VName whose fields have values taken from r.VName, with submatches
    45  // populated from the input string.
    46  //
    47  // Submatch replacement is done using regexp.ExpandString, so the same syntax
    48  // is supported for specifying replacements.
    49  func (r Rule) Apply(input string) (*spb.VName, bool) {
    50  	m := r.FindStringSubmatchIndex(input)
    51  	if m == nil {
    52  		return nil, false
    53  	}
    54  	return &spb.VName{
    55  		Corpus:    r.expand(m, input, r.Corpus),
    56  		Path:      r.expand(m, input, r.Path),
    57  		Root:      r.expand(m, input, r.Root),
    58  		Signature: r.expand(m, input, r.Signature),
    59  	}, true
    60  }
    61  
    62  // ToProto returns an equivalent VNameRewriteRule proto.
    63  func (r Rule) ToProto() *spb.VNameRewriteRule {
    64  	return &spb.VNameRewriteRule{
    65  		Pattern: trimAnchors(r.Regexp.String()),
    66  		VName: &spb.VName{
    67  			Corpus:    unfixTemplate(r.VName.Corpus),
    68  			Root:      unfixTemplate(r.VName.Root),
    69  			Path:      unfixTemplate(r.VName.Path),
    70  			Language:  unfixTemplate(r.VName.Language),
    71  			Signature: unfixTemplate(r.VName.Signature),
    72  		},
    73  	}
    74  }
    75  
    76  // String returns a debug string of r.
    77  func (r Rule) String() string { return r.ToProto().String() }
    78  
    79  // MarshalJSON implements the json.Marshaler interface.
    80  func (r Rule) MarshalJSON() ([]byte, error) {
    81  	return protojson.Marshal(r.ToProto())
    82  }
    83  
    84  // UnmarshalJSON implements the json.Unmarshaler interface.
    85  func (r *Rule) UnmarshalJSON(rec []byte) error {
    86  	var p spb.VNameRewriteRule
    87  	if err := protojson.Unmarshal(rec, &p); err != nil {
    88  		return err
    89  	}
    90  	rule, err := ConvertRule(&p)
    91  	if err != nil {
    92  		return err
    93  	}
    94  	*r = rule
    95  	return nil
    96  }
    97  
    98  func (r Rule) expand(match []int, input, template string) string {
    99  	return string(r.ExpandString(nil, template, input, match))
   100  }
   101  
   102  // Rules are an ordered set of rewriting rules.  Applying a group of rules
   103  // tries each rule in sequence, and returns the result of the first one that
   104  // matches.
   105  type Rules []Rule
   106  
   107  // Apply applies each rule in to the input in sequence, returning the first
   108  // successful match.  If no rules apply, returns (nil, false).
   109  func (r Rules) Apply(input string) (*spb.VName, bool) {
   110  	for _, rule := range r {
   111  		if v, ok := rule.Apply(input); ok {
   112  			return v, true
   113  		}
   114  	}
   115  	return nil, false
   116  }
   117  
   118  // ApplyDefault acts as r.Apply, but returns v there is no matching rule.
   119  func (r Rules) ApplyDefault(input string, v *spb.VName) *spb.VName {
   120  	if hit, ok := r.Apply(input); ok {
   121  		return hit
   122  	}
   123  	return v
   124  }
   125  
   126  // ToProto returns an equivalent VNameRewriteRules proto.
   127  func (r Rules) ToProto() *spb.VNameRewriteRules {
   128  	pb := &spb.VNameRewriteRules{
   129  		Rule: make([]*spb.VNameRewriteRule, len(r)),
   130  	}
   131  	for i, rule := range r {
   132  		pb.Rule[i] = rule.ToProto()
   133  	}
   134  	return pb
   135  }
   136  
   137  // Marshal implements the proto.Marshaler interface.
   138  func (r Rules) Marshal() ([]byte, error) { return proto.Marshal(r.ToProto()) }
   139  
   140  // ConvertRule compiles a VNameRewriteRule proto into a Rule that can be applied to strings.
   141  func ConvertRule(r *spb.VNameRewriteRule) (Rule, error) {
   142  	pattern := "^" + trimAnchors(r.Pattern) + "$"
   143  	re, err := regexp.Compile(pattern)
   144  	if err != nil {
   145  		return Rule{}, fmt.Errorf("invalid regular expression: %v", err)
   146  	}
   147  	return Rule{
   148  		Regexp: re,
   149  		VName: &spb.VName{
   150  			Corpus:    fixTemplate(r.VName.GetCorpus()),
   151  			Path:      fixTemplate(r.VName.GetPath()),
   152  			Root:      fixTemplate(r.VName.GetRoot()),
   153  			Language:  fixTemplate(r.VName.GetLanguage()),
   154  			Signature: fixTemplate(r.VName.GetSignature()),
   155  		},
   156  	}, nil
   157  }
   158  
   159  var (
   160  	anchorsRE = regexp.MustCompile(`([^\\]|^)(\\\\)*\$+$`)
   161  	fieldRE   = regexp.MustCompile(`@(\w+)@`)
   162  	markerRE  = regexp.MustCompile(`([^$]|^)(\$\$)*\${\w+}`)
   163  )
   164  
   165  func trimAnchors(pattern string) string {
   166  	return anchorsRE.ReplaceAllStringFunc(strings.TrimPrefix(pattern, "^"), func(r string) string {
   167  		return strings.TrimSuffix(r, "$")
   168  	})
   169  }
   170  
   171  // fixTemplate rewrites @x@ markers in the template to the ${x} markers used by
   172  // the regexp.Expand function, to simplify rewriting.
   173  func fixTemplate(s string) string {
   174  	if s == "" {
   175  		return ""
   176  	}
   177  	return fieldRE.ReplaceAllStringFunc(strings.Replace(s, "$", "$$", -1),
   178  		func(s string) string {
   179  			return "${" + strings.Trim(s, "@") + "}"
   180  		})
   181  }
   182  
   183  func unfixTemplate(s string) string {
   184  	return strings.Replace(markerRE.ReplaceAllStringFunc(s, func(s string) string {
   185  		var prefix int
   186  		for ; !strings.HasPrefix(s[prefix:], "${"); prefix++ {
   187  		}
   188  		return s[:prefix] + "@" + strings.TrimPrefix(strings.TrimSuffix(s[prefix:], "}"), "${") + "@"
   189  	}), "$$", "$", -1)
   190  }
   191  
   192  func expectDelim(de *json.Decoder, expected json.Delim) error {
   193  	if tok, err := de.Token(); err != nil {
   194  		return err
   195  	} else if delim, ok := tok.(json.Delim); !ok || delim != expected {
   196  		return fmt.Errorf("expected %s; found %v", expected, tok)
   197  	}
   198  	return nil
   199  }
   200  
   201  // ParseProtoRules reads a wire-encoded *spb.VNameRewriteRules.
   202  func ParseProtoRules(data []byte) (Rules, error) {
   203  	var pb spb.VNameRewriteRules
   204  	if err := proto.Unmarshal(data, &pb); err != nil {
   205  		return nil, err
   206  	}
   207  	rules := make(Rules, len(pb.Rule))
   208  	for i, rp := range pb.Rule {
   209  		r, err := ConvertRule(rp)
   210  		if err != nil {
   211  			return nil, err
   212  		}
   213  		rules[i] = r
   214  	}
   215  	return rules, nil
   216  }
   217  
   218  // ParseRules reads Rules from JSON-encoded data in a byte array.
   219  func ParseRules(data []byte) (Rules, error) { return ReadRules(bytes.NewReader(data)) }
   220  
   221  // ReadRules parses Rules from JSON-encoded data in the following format:
   222  //
   223  //	[
   224  //	  {
   225  //	    "pattern": "re2_regex_pattern",
   226  //	    "vname": {
   227  //	      "corpus": "corpus_template",
   228  //	      "root": "root_template",
   229  //	      "path": "path_template"
   230  //	    }
   231  //	  }, ...
   232  //	]
   233  //
   234  // Each pattern is an RE2 regexp pattern.  Patterns are implicitly anchored at
   235  // both ends.  The template strings may contain markers of the form @n@, that
   236  // will be replaced by the n'th regexp group on a successful input match.
   237  func ReadRules(r io.Reader) (Rules, error) {
   238  	de := json.NewDecoder(r)
   239  
   240  	// Check for start of array.
   241  	if err := expectDelim(de, '['); err != nil {
   242  		return nil, err
   243  	}
   244  
   245  	// Parse each element of the array as a VNameRewriteRule.
   246  	rules := Rules{}
   247  	for de.More() {
   248  		var raw json.RawMessage
   249  		if err := de.Decode(&raw); err != nil {
   250  			return nil, err
   251  		}
   252  		var pb spb.VNameRewriteRule
   253  		if err := protojson.Unmarshal(raw, &pb); err != nil {
   254  			return nil, err
   255  		}
   256  		r, err := ConvertRule(&pb)
   257  		if err != nil {
   258  			return nil, err
   259  		}
   260  		rules = append(rules, r)
   261  	}
   262  
   263  	// Check for end of array.
   264  	if err := expectDelim(de, ']'); err != nil {
   265  		return nil, err
   266  	}
   267  
   268  	// Check for EOF
   269  	if tok, err := de.Token(); err != io.EOF {
   270  		if err != nil {
   271  			return nil, err
   272  		}
   273  		return nil, fmt.Errorf("expected EOF; found: %s", tok)
   274  	}
   275  
   276  	return rules, nil
   277  }
   278  
   279  // LoadRules loads and parses the vname mapping rules in path.
   280  // If path == "", this returns nil without error (no rules).
   281  func LoadRules(path string) (Rules, error) {
   282  	if path == "" {
   283  		return nil, nil
   284  	}
   285  	f, err := os.Open(path)
   286  	if err != nil {
   287  		return nil, fmt.Errorf("opening vname rules file: %v", err)
   288  	}
   289  	defer f.Close()
   290  	return ReadRules(f)
   291  }