kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/util/kytheuri/uri.go (about) 1 /* 2 * Copyright 2014 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package kytheuri provides a type to represent Kythe URIs. This package 18 // supports parsing a Kythe URI from a string, and converting back and forth 19 // between a Kythe URI and a Kythe VName protobuf message. 20 package kytheuri // import "kythe.io/kythe/go/util/kytheuri" 21 22 import ( 23 "errors" 24 "fmt" 25 "path" 26 "strings" 27 28 cpb "kythe.io/kythe/proto/common_go_proto" 29 spb "kythe.io/kythe/proto/storage_go_proto" 30 ) 31 32 // Scheme is the URI scheme label for Kythe. 33 const Scheme = "kythe:" 34 35 // A URI represents a parsed, unescaped Kythe URI. A zero-valued URI is ready 36 // for use, representing the empty URI. 37 type URI struct { 38 Signature string 39 Corpus string 40 Root string 41 Path string 42 Language string 43 } 44 45 // VName converts the URI to an equivalent Kythe VName protobuf message. 46 func (u *URI) VName() *spb.VName { 47 if u == nil { 48 return new(spb.VName) 49 } 50 return &spb.VName{ 51 Signature: u.Signature, 52 Corpus: u.Corpus, 53 Root: u.Root, 54 Path: cleanPath(u.Path), 55 Language: u.Language, 56 } 57 } 58 59 // CorpusPath returns the CorpusPath components of the URI. 60 func (u *URI) CorpusPath() *cpb.CorpusPath { 61 if u == nil { 62 return new(cpb.CorpusPath) 63 } 64 return &cpb.CorpusPath{ 65 Corpus: u.Corpus, 66 Root: u.Root, 67 Path: cleanPath(u.Path), 68 } 69 } 70 71 // String renders the Kythe URI into the standard URI string format. 72 // 73 // The resulting string is in canonical ordering, so if the URI was created by 74 // parsing a string, this may return a different string from that. However, 75 // parsing this string will always give back the same URI. If u == nil, it is 76 // treated as an empty URI. 77 func (u *URI) String() string { return u.Encode().String() } 78 79 // Equal reports whether u is equal to v. 80 func (u *URI) Equal(v *URI) bool { return u.String() == v.String() } 81 82 // Encode returns an escaped "raw" Kythe URI equivalent to u. 83 func (u *URI) Encode() *Raw { 84 if u == nil { 85 return nil 86 } 87 return &Raw{ 88 URI: URI{ 89 Signature: all.escape(u.Signature), 90 Corpus: paths.escape(u.Corpus), 91 Root: paths.escape(u.Root), 92 Path: paths.escape(cleanPath(u.Path)), 93 Language: all.escape(u.Language), 94 }, 95 } 96 } 97 98 // A Raw represents a parsed, "raw" Kythe URI whose field values are escaped. 99 // Use the Decode method to convert a *Raw to a plain *URI. 100 type Raw struct{ URI URI } 101 102 // Decode returns a *URI equivalent to r but with its field values unescaped. 103 func (r *Raw) Decode() (*URI, error) { 104 u := r.URI // copy 105 buf := make([]byte, len(u.Signature)+len(u.Corpus)+len(u.Root)+len(u.Path)+len(u.Language)) 106 return decode(&u, buf) 107 } 108 109 // String renders r into the standard URI string format. 110 // 111 // The resulting string is in canonical ordering, so if the URI was created by 112 // parsing a string, this may return a different string from that. However, 113 // parsing this string will always give back the same URI. If r == nil, it is 114 // treated as an empty URI. 115 func (r *Raw) String() string { 116 if r == nil { 117 return Scheme 118 } 119 var buf strings.Builder 120 buf.Grow(len(Scheme) + 121 2 + len(r.URI.Corpus) + // "//" + corpus 122 6 + len(r.URI.Language) + // "?lang=" + string 123 6 + len(r.URI.Path) + // "?path=" + string 124 6 + len(r.URI.Root) + // "?root=" + string 125 1 + len(r.URI.Signature), // "#" + string 126 ) 127 buf.WriteString(Scheme) 128 if c := r.URI.Corpus; c != "" { 129 buf.WriteString("//") 130 buf.WriteString(c) 131 } 132 133 // Pack up the query arguments. Order matters here, so that we can preserve 134 // a canonical string format. 135 if s := r.URI.Language; s != "" { 136 buf.WriteString("?lang=") 137 buf.WriteString(s) 138 } 139 if s := r.URI.Path; s != "" { 140 buf.WriteString("?path=") 141 buf.WriteString(s) 142 } 143 if s := r.URI.Root; s != "" { 144 buf.WriteString("?root=") 145 buf.WriteString(s) 146 } 147 148 // If there is a signature, add that in as well. 149 if s := r.URI.Signature; s != "" { 150 buf.WriteByte('#') 151 buf.WriteString(s) 152 } 153 return buf.String() 154 } 155 156 // FromVName returns a Kythe URI for the given Kythe VName protobuf message. 157 func FromVName(v *spb.VName) *URI { 158 if v == nil { 159 return &URI{} 160 } 161 return &URI{ 162 Signature: v.Signature, 163 Corpus: v.Corpus, 164 Root: v.Root, 165 Path: v.Path, 166 Language: v.Language, 167 } 168 } 169 170 // FromCorpusPath returns a Kythe URI for the given Kythe CorpusPath protobuf message. 171 func FromCorpusPath(cp *cpb.CorpusPath) *URI { 172 if cp == nil { 173 return &URI{} 174 } 175 return &URI{ 176 Corpus: cp.Corpus, 177 Root: cp.Root, 178 Path: cp.Path, 179 } 180 } 181 182 // cleanPath is as path.Clean, but leaves "" alone. 183 func cleanPath(s string) string { 184 if s == "" { 185 return s 186 } 187 return path.Clean(s) 188 } 189 190 // Partition s around the first occurrence of mark, if any. 191 // If s has the form p mark q, returns p, q; otherwise returns s, "". 192 func split(s string, mark byte) (prefix, suffix string) { 193 if i := strings.IndexByte(s, mark); i >= 0 { 194 return s[:i], s[i+1:] 195 } 196 return s, "" 197 } 198 199 // ParseRaw parses a Kythe URI from s, but does not unescape its fields. Use 200 // Parse to fully parse and unescape a URI, or call the Decode method of the 201 // returned value. 202 func ParseRaw(s string) (*Raw, error) { 203 if s == "" { 204 return new(Raw), nil 205 } 206 207 // Split off the signature from the fragment tail, if defined. 208 head, fragment := split(s, '#') 209 210 // Check for a scheme label. This may be empty; but if present, it must be 211 // our expected scheme. 212 if tail := strings.TrimPrefix(head, Scheme); tail != head { 213 head = tail // found and removed our scheme marker 214 } 215 216 // Check for a bundle of attribute values. This may be empty. 217 head, attrs := split(head, '?') 218 if tail := strings.TrimPrefix(head, "//"); tail != head { 219 head = tail 220 } else if head != "" { 221 return nil, errors.New("invalid URI scheme") 222 } 223 224 r := &Raw{ 225 URI: URI{ 226 Signature: fragment, 227 Corpus: head, 228 }, 229 } 230 231 // If there are any attributes, parse them. We allow valid attributes to 232 // occur in any order, even if it is not canonical. 233 if attrs != "" { 234 if err := splitByte(attrs, '?', func(attr string) error { 235 name, value := split(attr, '=') 236 if value == "" { 237 return fmt.Errorf("invalid attribute: %q", attr) 238 } 239 switch name { 240 case "lang": 241 r.URI.Language = value 242 case "root": 243 r.URI.Root = value 244 case "path": 245 r.URI.Path = value 246 default: 247 return fmt.Errorf("invalid attribute: %q", name) 248 } 249 return nil 250 }); err != nil { 251 return nil, err 252 } 253 } 254 return r, nil 255 } 256 257 // splitByte calls f with each partition of s delimited by b or the end of the 258 // string. If f reports an error, the split is aborted and that error is 259 // returned to the caller of splitByte. 260 func splitByte(s string, b byte, f func(string) error) error { 261 pos := 0 262 for pos < len(s) { 263 tail := s[pos:] 264 i := strings.IndexByte(tail, b) 265 if i < 0 { 266 return f(tail) 267 } else if err := f(tail[:i]); err != nil { 268 return err 269 } 270 pos += i + 1 271 } 272 return nil 273 } 274 275 // Parse parses and unescapes a Kythe URI from s. If s omits a scheme label, 276 // the "kythe" scheme is assumed. 277 func Parse(s string) (*URI, error) { 278 r, err := ParseRaw(s) 279 if err != nil { 280 return nil, err 281 } 282 return decode(&r.URI, make([]byte, len(s))) 283 } 284 285 // ParseCorpusPath parses a Kythe URI and returns its CorpusPath components. 286 func ParseCorpusPath(s string) (*cpb.CorpusPath, error) { 287 u, err := Parse(s) 288 if err != nil { 289 return nil, err 290 } 291 return u.CorpusPath(), nil 292 } 293 294 // decode decodes u in-place using buf as an intermediate buffer. The caller 295 // must ensure len(buf) is sufficient to hold the longest field. Preallocation 296 // reduces allocation for unescaping and saves ~200 ns/op in benchmarks. 297 func decode(u *URI, buf []byte) (*URI, error) { 298 if err := unescape(&u.Signature, buf); err != nil { 299 return nil, fmt.Errorf("invalid signature: %v", err) 300 } else if err := unescape(&u.Corpus, buf); err != nil { 301 return nil, fmt.Errorf("invalid corpus label: %v", err) 302 } else if err := unescape(&u.Language, buf); err != nil { 303 return nil, fmt.Errorf("invalid language: %v", err) 304 } else if err := unescape(&u.Path, buf); err != nil { 305 return nil, fmt.Errorf("invalid path: %v", err) 306 } else if err := unescape(&u.Root, buf); err != nil { 307 return nil, fmt.Errorf("invalid root: %v", err) 308 } 309 return u, nil 310 } 311 312 // ToString renders the given VName into the standard string uri format. 313 func ToString(v *spb.VName) string { return FromVName(v).String() } 314 315 // ToVName parses the given string as a URI and returns an equivalent VName. 316 func ToVName(s string) (*spb.VName, error) { 317 uri, err := Parse(s) 318 if err != nil { 319 return nil, err 320 } 321 return uri.VName(), nil 322 } 323 324 // MustParse returns the URI from parsing s, or panics in case of error. 325 func MustParse(s string) *URI { 326 u, err := Parse(s) 327 if err != nil { 328 panic(fmt.Sprintf("Parse %q: %v", s, err)) 329 } 330 return u 331 } 332 333 // Fix returns the canonical form of the given Kythe URI, if possible. 334 func Fix(s string) (string, error) { 335 u, err := Parse(s) 336 if err != nil { 337 return "", err 338 } 339 return u.String(), nil 340 } 341 342 // Equal reports whether the two Kythe URI strings are equal in canonical form. 343 // If either URI is invalid, Equal returns false. 344 func Equal(u1, u2 string) bool { 345 f1, err := Fix(u1) 346 if err != nil { 347 return false 348 } 349 f2, err := Fix(u2) 350 if err != nil { 351 return false 352 } 353 return f1 == f2 354 }