kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/util/vnameutil/rewrite.go (about) 1 /* 2 * Copyright 2014 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package vnameutil provides utilities for generating consistent VNames from 18 // common path-like values (e.g., filenames, import paths). 19 package vnameutil // import "kythe.io/kythe/go/util/vnameutil" 20 21 import ( 22 "bytes" 23 "encoding/json" 24 "fmt" 25 "io" 26 "os" 27 "regexp" 28 "strings" 29 30 "google.golang.org/protobuf/encoding/protojson" 31 "google.golang.org/protobuf/proto" 32 33 spb "kythe.io/kythe/proto/storage_go_proto" 34 ) 35 36 // A Rule associates a regular expression pattern with a VName template. A 37 // Rule can be applied to a string to produce a VName. 38 type Rule struct { 39 *regexp.Regexp // A pattern to match against an input string 40 *spb.VName // A template to populate with matches from the input 41 } 42 43 // Apply reports whether input matches the regexp associated with r. If so, it 44 // returns a VName whose fields have values taken from r.VName, with submatches 45 // populated from the input string. 46 // 47 // Submatch replacement is done using regexp.ExpandString, so the same syntax 48 // is supported for specifying replacements. 49 func (r Rule) Apply(input string) (*spb.VName, bool) { 50 m := r.FindStringSubmatchIndex(input) 51 if m == nil { 52 return nil, false 53 } 54 return &spb.VName{ 55 Corpus: r.expand(m, input, r.Corpus), 56 Path: r.expand(m, input, r.Path), 57 Root: r.expand(m, input, r.Root), 58 Signature: r.expand(m, input, r.Signature), 59 }, true 60 } 61 62 // ToProto returns an equivalent VNameRewriteRule proto. 63 func (r Rule) ToProto() *spb.VNameRewriteRule { 64 return &spb.VNameRewriteRule{ 65 Pattern: trimAnchors(r.Regexp.String()), 66 VName: &spb.VName{ 67 Corpus: unfixTemplate(r.VName.Corpus), 68 Root: unfixTemplate(r.VName.Root), 69 Path: unfixTemplate(r.VName.Path), 70 Language: unfixTemplate(r.VName.Language), 71 Signature: unfixTemplate(r.VName.Signature), 72 }, 73 } 74 } 75 76 // String returns a debug string of r. 77 func (r Rule) String() string { return r.ToProto().String() } 78 79 // MarshalJSON implements the json.Marshaler interface. 80 func (r Rule) MarshalJSON() ([]byte, error) { 81 return protojson.Marshal(r.ToProto()) 82 } 83 84 // UnmarshalJSON implements the json.Unmarshaler interface. 85 func (r *Rule) UnmarshalJSON(rec []byte) error { 86 var p spb.VNameRewriteRule 87 if err := protojson.Unmarshal(rec, &p); err != nil { 88 return err 89 } 90 rule, err := ConvertRule(&p) 91 if err != nil { 92 return err 93 } 94 *r = rule 95 return nil 96 } 97 98 func (r Rule) expand(match []int, input, template string) string { 99 return string(r.ExpandString(nil, template, input, match)) 100 } 101 102 // Rules are an ordered set of rewriting rules. Applying a group of rules 103 // tries each rule in sequence, and returns the result of the first one that 104 // matches. 105 type Rules []Rule 106 107 // Apply applies each rule in to the input in sequence, returning the first 108 // successful match. If no rules apply, returns (nil, false). 109 func (r Rules) Apply(input string) (*spb.VName, bool) { 110 for _, rule := range r { 111 if v, ok := rule.Apply(input); ok { 112 return v, true 113 } 114 } 115 return nil, false 116 } 117 118 // ApplyDefault acts as r.Apply, but returns v there is no matching rule. 119 func (r Rules) ApplyDefault(input string, v *spb.VName) *spb.VName { 120 if hit, ok := r.Apply(input); ok { 121 return hit 122 } 123 return v 124 } 125 126 // ToProto returns an equivalent VNameRewriteRules proto. 127 func (r Rules) ToProto() *spb.VNameRewriteRules { 128 pb := &spb.VNameRewriteRules{ 129 Rule: make([]*spb.VNameRewriteRule, len(r)), 130 } 131 for i, rule := range r { 132 pb.Rule[i] = rule.ToProto() 133 } 134 return pb 135 } 136 137 // Marshal implements the proto.Marshaler interface. 138 func (r Rules) Marshal() ([]byte, error) { return proto.Marshal(r.ToProto()) } 139 140 // ConvertRule compiles a VNameRewriteRule proto into a Rule that can be applied to strings. 141 func ConvertRule(r *spb.VNameRewriteRule) (Rule, error) { 142 pattern := "^" + trimAnchors(r.Pattern) + "$" 143 re, err := regexp.Compile(pattern) 144 if err != nil { 145 return Rule{}, fmt.Errorf("invalid regular expression: %v", err) 146 } 147 return Rule{ 148 Regexp: re, 149 VName: &spb.VName{ 150 Corpus: fixTemplate(r.VName.GetCorpus()), 151 Path: fixTemplate(r.VName.GetPath()), 152 Root: fixTemplate(r.VName.GetRoot()), 153 Language: fixTemplate(r.VName.GetLanguage()), 154 Signature: fixTemplate(r.VName.GetSignature()), 155 }, 156 }, nil 157 } 158 159 var ( 160 anchorsRE = regexp.MustCompile(`([^\\]|^)(\\\\)*\$+$`) 161 fieldRE = regexp.MustCompile(`@(\w+)@`) 162 markerRE = regexp.MustCompile(`([^$]|^)(\$\$)*\${\w+}`) 163 ) 164 165 func trimAnchors(pattern string) string { 166 return anchorsRE.ReplaceAllStringFunc(strings.TrimPrefix(pattern, "^"), func(r string) string { 167 return strings.TrimSuffix(r, "$") 168 }) 169 } 170 171 // fixTemplate rewrites @x@ markers in the template to the ${x} markers used by 172 // the regexp.Expand function, to simplify rewriting. 173 func fixTemplate(s string) string { 174 if s == "" { 175 return "" 176 } 177 return fieldRE.ReplaceAllStringFunc(strings.Replace(s, "$", "$$", -1), 178 func(s string) string { 179 return "${" + strings.Trim(s, "@") + "}" 180 }) 181 } 182 183 func unfixTemplate(s string) string { 184 return strings.Replace(markerRE.ReplaceAllStringFunc(s, func(s string) string { 185 var prefix int 186 for ; !strings.HasPrefix(s[prefix:], "${"); prefix++ { 187 } 188 return s[:prefix] + "@" + strings.TrimPrefix(strings.TrimSuffix(s[prefix:], "}"), "${") + "@" 189 }), "$$", "$", -1) 190 } 191 192 func expectDelim(de *json.Decoder, expected json.Delim) error { 193 if tok, err := de.Token(); err != nil { 194 return err 195 } else if delim, ok := tok.(json.Delim); !ok || delim != expected { 196 return fmt.Errorf("expected %s; found %v", expected, tok) 197 } 198 return nil 199 } 200 201 // ParseProtoRules reads a wire-encoded *spb.VNameRewriteRules. 202 func ParseProtoRules(data []byte) (Rules, error) { 203 var pb spb.VNameRewriteRules 204 if err := proto.Unmarshal(data, &pb); err != nil { 205 return nil, err 206 } 207 rules := make(Rules, len(pb.Rule)) 208 for i, rp := range pb.Rule { 209 r, err := ConvertRule(rp) 210 if err != nil { 211 return nil, err 212 } 213 rules[i] = r 214 } 215 return rules, nil 216 } 217 218 // ParseRules reads Rules from JSON-encoded data in a byte array. 219 func ParseRules(data []byte) (Rules, error) { return ReadRules(bytes.NewReader(data)) } 220 221 // ReadRules parses Rules from JSON-encoded data in the following format: 222 // 223 // [ 224 // { 225 // "pattern": "re2_regex_pattern", 226 // "vname": { 227 // "corpus": "corpus_template", 228 // "root": "root_template", 229 // "path": "path_template" 230 // } 231 // }, ... 232 // ] 233 // 234 // Each pattern is an RE2 regexp pattern. Patterns are implicitly anchored at 235 // both ends. The template strings may contain markers of the form @n@, that 236 // will be replaced by the n'th regexp group on a successful input match. 237 func ReadRules(r io.Reader) (Rules, error) { 238 de := json.NewDecoder(r) 239 240 // Check for start of array. 241 if err := expectDelim(de, '['); err != nil { 242 return nil, err 243 } 244 245 // Parse each element of the array as a VNameRewriteRule. 246 rules := Rules{} 247 for de.More() { 248 var raw json.RawMessage 249 if err := de.Decode(&raw); err != nil { 250 return nil, err 251 } 252 var pb spb.VNameRewriteRule 253 if err := protojson.Unmarshal(raw, &pb); err != nil { 254 return nil, err 255 } 256 r, err := ConvertRule(&pb) 257 if err != nil { 258 return nil, err 259 } 260 rules = append(rules, r) 261 } 262 263 // Check for end of array. 264 if err := expectDelim(de, ']'); err != nil { 265 return nil, err 266 } 267 268 // Check for EOF 269 if tok, err := de.Token(); err != io.EOF { 270 if err != nil { 271 return nil, err 272 } 273 return nil, fmt.Errorf("expected EOF; found: %s", tok) 274 } 275 276 return rules, nil 277 } 278 279 // LoadRules loads and parses the vname mapping rules in path. 280 // If path == "", this returns nil without error (no rules). 281 func LoadRules(path string) (Rules, error) { 282 if path == "" { 283 return nil, nil 284 } 285 f, err := os.Open(path) 286 if err != nil { 287 return nil, fmt.Errorf("opening vname rules file: %v", err) 288 } 289 defer f.Close() 290 return ReadRules(f) 291 }