github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/blob/ref.go (about) 1 /* 2 Copyright 2013 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package blob defines types to refer to and retrieve low-level Camlistore blobs. 18 package blob 19 20 import ( 21 "bytes" 22 "crypto/sha1" 23 "errors" 24 "fmt" 25 "hash" 26 "reflect" 27 "regexp" 28 "strings" 29 ) 30 31 // Pattern is the regular expression which matches a blobref. 32 // It does not contain ^ or $. 33 const Pattern = `\b([a-z][a-z0-9]*)-([a-f0-9]+)\b` 34 35 // whole blobref pattern 36 var blobRefPattern = regexp.MustCompile("^" + Pattern + "$") 37 38 // Ref is a reference to a Camlistore blob. 39 // It is used as a value type and supports equality (with ==) and the ability 40 // to use it as a map key. 41 type Ref struct { 42 digest digestType 43 } 44 45 // SizedRef is like a Ref but includes a size. 46 // It should also be used as a value type and supports equality. 47 type SizedRef struct { 48 Ref 49 Size uint32 50 } 51 52 func (sr SizedRef) String() string { 53 return fmt.Sprintf("[%s; %d bytes]", sr.Ref.String(), sr.Size) 54 } 55 56 // digestType is an interface type, but any type implementing it must 57 // be of concrete type [N]byte, so it supports equality with ==, 58 // which is a requirement for ref. 59 type digestType interface { 60 bytes() []byte 61 digestName() string 62 newHash() hash.Hash 63 } 64 65 func (r Ref) String() string { 66 if r.digest == nil { 67 return "<invalid-blob.Ref>" 68 } 69 // TODO: maybe memoize this. 70 dname := r.digest.digestName() 71 bs := r.digest.bytes() 72 buf := getBuf(len(dname) + 1 + len(bs)*2)[:0] 73 defer putBuf(buf) 74 return string(r.appendString(buf)) 75 } 76 77 func (r Ref) appendString(buf []byte) []byte { 78 dname := r.digest.digestName() 79 bs := r.digest.bytes() 80 buf = append(buf, dname...) 81 buf = append(buf, '-') 82 for _, b := range bs { 83 buf = append(buf, hexDigit[b>>4], hexDigit[b&0xf]) 84 } 85 if o, ok := r.digest.(otherDigest); ok && o.odd { 86 buf = buf[:len(buf)-1] 87 } 88 return buf 89 } 90 91 // HashName returns the lowercase hash function name of the reference. 92 // It panics if r is zero. 93 func (r Ref) HashName() string { 94 if r.digest == nil { 95 panic("HashName called on invalid Ref") 96 } 97 return r.digest.digestName() 98 } 99 100 // Digest returns the lower hex digest of the blobref, without 101 // the e.g. "sha1-" prefix. It panics if r is zero. 102 func (r Ref) Digest() string { 103 if r.digest == nil { 104 panic("Digest called on invalid Ref") 105 } 106 bs := r.digest.bytes() 107 buf := getBuf(len(bs) * 2)[:0] 108 defer putBuf(buf) 109 for _, b := range bs { 110 buf = append(buf, hexDigit[b>>4], hexDigit[b&0xf]) 111 } 112 if o, ok := r.digest.(otherDigest); ok && o.odd { 113 buf = buf[:len(buf)-1] 114 } 115 return string(buf) 116 } 117 118 func (r Ref) DigestPrefix(digits int) string { 119 v := r.Digest() 120 if len(v) < digits { 121 return v 122 } 123 return v[:digits] 124 } 125 126 func (r Ref) DomID() string { 127 if !r.Valid() { 128 return "" 129 } 130 return "camli-" + r.String() 131 } 132 133 func (r Ref) Sum32() uint32 { 134 var v uint32 135 for _, b := range r.digest.bytes()[:4] { 136 v = v<<8 | uint32(b) 137 } 138 return v 139 } 140 141 func (r Ref) Sum64() uint64 { 142 var v uint64 143 for _, b := range r.digest.bytes()[:8] { 144 v = v<<8 | uint64(b) 145 } 146 return v 147 } 148 149 // Hash returns a new hash.Hash of r's type. 150 // It panics if r is zero. 151 func (r Ref) Hash() hash.Hash { 152 return r.digest.newHash() 153 } 154 155 func (r Ref) HashMatches(h hash.Hash) bool { 156 if r.digest == nil { 157 return false 158 } 159 return bytes.Equal(h.Sum(nil), r.digest.bytes()) 160 } 161 162 const hexDigit = "0123456789abcdef" 163 164 func (r Ref) Valid() bool { return r.digest != nil } 165 166 func (r Ref) IsSupported() bool { 167 if !r.Valid() { 168 return false 169 } 170 _, ok := metaFromString[r.digest.digestName()] 171 return ok 172 } 173 174 // ParseKnown is like Parse, but only parse blobrefs known to this 175 // server. It returns ok == false for well-formed but unsupported 176 // blobrefs. 177 func ParseKnown(s string) (ref Ref, ok bool) { 178 return parse(s, false) 179 } 180 181 // Parse parse s as a blobref and returns the ref and whether it was 182 // parsed successfully. 183 func Parse(s string) (ref Ref, ok bool) { 184 return parse(s, true) 185 } 186 187 func parse(s string, allowAll bool) (ref Ref, ok bool) { 188 i := strings.Index(s, "-") 189 if i < 0 { 190 return 191 } 192 name := s[:i] // e.g. "sha1" 193 hex := s[i+1:] 194 meta, ok := metaFromString[name] 195 if !ok { 196 if allowAll || testRefType[name] { 197 return parseUnknown(name, hex) 198 } 199 return 200 } 201 if len(hex) != meta.size*2 { 202 ok = false 203 return 204 } 205 dt, ok := meta.ctors(hex) 206 if !ok { 207 return 208 } 209 return Ref{dt}, true 210 } 211 212 var testRefType = map[string]bool{ 213 "fakeref": true, 214 "testref": true, 215 "perma": true, 216 } 217 218 // ParseBytes is like Parse, but parses from a byte slice. 219 func ParseBytes(s []byte) (ref Ref, ok bool) { 220 i := bytes.IndexByte(s, '-') 221 if i < 0 { 222 return 223 } 224 name := s[:i] // e.g. "sha1" 225 hex := s[i+1:] 226 meta, ok := metaFromBytes(name) 227 if !ok { 228 return parseUnknown(string(name), string(hex)) 229 } 230 if len(hex) != meta.size*2 { 231 ok = false 232 return 233 } 234 dt, ok := meta.ctorb(hex) 235 if !ok { 236 return 237 } 238 return Ref{dt}, true 239 } 240 241 // Parse parse s as a blobref. If s is invalid, a zero Ref is returned 242 // which can be tested with the Valid method. 243 func ParseOrZero(s string) Ref { 244 ref, ok := Parse(s) 245 if !ok { 246 return Ref{} 247 } 248 return ref 249 } 250 251 // MustParse parse s as a blobref and panics on failure. 252 func MustParse(s string) Ref { 253 ref, ok := Parse(s) 254 if !ok { 255 panic("Invalid blobref " + s) 256 } 257 return ref 258 } 259 260 // '0' => 0 ... 'f' => 15, else sets *bad to true. 261 func hexVal(b byte, bad *bool) byte { 262 if '0' <= b && b <= '9' { 263 return b - '0' 264 } 265 if 'a' <= b && b <= 'f' { 266 return b - 'a' + 10 267 } 268 *bad = true 269 return 0 270 } 271 272 func validDigestName(name string) bool { 273 if name == "" { 274 return false 275 } 276 for _, r := range name { 277 if 'a' <= r && r <= 'z' { 278 continue 279 } 280 if '0' <= r && r <= '9' { 281 continue 282 } 283 return false 284 } 285 return true 286 } 287 288 // parseUnknown parses a blobref where the digest type isn't known to this server. 289 // e.g. ("foo-ababab") 290 func parseUnknown(digest, hex string) (ref Ref, ok bool) { 291 if !validDigestName(digest) { 292 return 293 } 294 295 // TODO: remove this short hack and don't allow odd numbers of hex digits. 296 odd := false 297 if len(hex)%2 != 0 { 298 hex += "0" 299 odd = true 300 } 301 302 if len(hex) < 2 || len(hex)%2 != 0 || len(hex) > maxOtherDigestLen*2 { 303 return 304 } 305 o := otherDigest{ 306 name: digest, 307 sumLen: len(hex) / 2, 308 odd: odd, 309 } 310 bad := false 311 for i := 0; i < len(hex); i += 2 { 312 o.sum[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad) 313 } 314 if bad { 315 return 316 } 317 return Ref{o}, true 318 } 319 320 func sha1FromBinary(b []byte) digestType { 321 var d sha1Digest 322 if len(d) != len(b) { 323 panic("bogus sha-1 length") 324 } 325 copy(d[:], b) 326 return d 327 } 328 329 func sha1FromHexString(hex string) (digestType, bool) { 330 var d sha1Digest 331 var bad bool 332 for i := 0; i < len(hex); i += 2 { 333 d[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad) 334 } 335 if bad { 336 return nil, false 337 } 338 return d, true 339 } 340 341 // yawn. exact copy of sha1FromHexString. 342 func sha1FromHexBytes(hex []byte) (digestType, bool) { 343 var d sha1Digest 344 var bad bool 345 for i := 0; i < len(hex); i += 2 { 346 d[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad) 347 } 348 if bad { 349 return nil, false 350 } 351 return d, true 352 } 353 354 // RefFromHash returns a blobref representing the given hash. 355 // It panics if the hash isn't of a known type. 356 func RefFromHash(h hash.Hash) Ref { 357 meta, ok := metaFromType[reflect.TypeOf(h)] 358 if !ok { 359 panic(fmt.Sprintf("Currently-unsupported hash type %T", h)) 360 } 361 return Ref{meta.ctor(h.Sum(nil))} 362 } 363 364 // RefFromString returns a blobref from the given string, for the currently 365 // recommended hash function 366 func RefFromString(s string) Ref { 367 return SHA1FromString(s) 368 } 369 370 // SHA1FromString returns a SHA-1 blobref of the provided string. 371 func SHA1FromString(s string) Ref { 372 s1 := sha1.New() 373 s1.Write([]byte(s)) 374 return RefFromHash(s1) 375 } 376 377 // SHA1FromBytes returns a SHA-1 blobref of the provided bytes. 378 func SHA1FromBytes(b []byte) Ref { 379 s1 := sha1.New() 380 s1.Write(b) 381 return RefFromHash(s1) 382 } 383 384 type sha1Digest [20]byte 385 386 func (s sha1Digest) digestName() string { return "sha1" } 387 func (s sha1Digest) bytes() []byte { return s[:] } 388 func (s sha1Digest) newHash() hash.Hash { return sha1.New() } 389 390 const maxOtherDigestLen = 128 391 392 type otherDigest struct { 393 name string 394 sum [maxOtherDigestLen]byte 395 sumLen int // bytes in sum that are valid 396 odd bool // odd number of hex digits in input 397 } 398 399 func (d otherDigest) digestName() string { return d.name } 400 func (d otherDigest) bytes() []byte { return d.sum[:d.sumLen] } 401 func (d otherDigest) newHash() hash.Hash { return nil } 402 403 var sha1Meta = &digestMeta{ 404 ctor: sha1FromBinary, 405 ctors: sha1FromHexString, 406 ctorb: sha1FromHexBytes, 407 size: sha1.Size, 408 } 409 410 var metaFromString = map[string]*digestMeta{ 411 "sha1": sha1Meta, 412 } 413 414 type blobTypeAndMeta struct { 415 name []byte 416 meta *digestMeta 417 } 418 419 var metas []blobTypeAndMeta 420 421 func metaFromBytes(name []byte) (meta *digestMeta, ok bool) { 422 for _, bm := range metas { 423 if bytes.Equal(name, bm.name) { 424 return bm.meta, true 425 } 426 } 427 return 428 } 429 430 func init() { 431 for name, meta := range metaFromString { 432 metas = append(metas, blobTypeAndMeta{ 433 name: []byte(name), 434 meta: meta, 435 }) 436 } 437 } 438 439 var sha1Type = reflect.TypeOf(sha1.New()) 440 441 var metaFromType = map[reflect.Type]*digestMeta{ 442 sha1Type: sha1Meta, 443 } 444 445 type digestMeta struct { 446 ctor func(binary []byte) digestType 447 ctors func(hex string) (digestType, bool) 448 ctorb func(hex []byte) (digestType, bool) 449 size int // bytes of digest 450 } 451 452 var bufPool = make(chan []byte, 20) 453 454 func getBuf(size int) []byte { 455 for { 456 select { 457 case b := <-bufPool: 458 if cap(b) >= size { 459 return b[:size] 460 } 461 default: 462 return make([]byte, size) 463 } 464 } 465 } 466 467 func putBuf(b []byte) { 468 select { 469 case bufPool <- b: 470 default: 471 } 472 } 473 474 // NewHash returns a new hash.Hash of the currently recommended hash type. 475 // Currently this is just SHA-1, but will likely change within the next 476 // year or so. 477 func NewHash() hash.Hash { 478 return sha1.New() 479 } 480 481 func ValidRefString(s string) bool { 482 // TODO: optimize to not allocate 483 return ParseOrZero(s).Valid() 484 } 485 486 var null = []byte(`null`) 487 488 func (r *Ref) UnmarshalJSON(d []byte) error { 489 if r.digest != nil { 490 return errors.New("Can't UnmarshalJSON into a non-zero Ref") 491 } 492 if len(d) == 0 || bytes.Equal(d, null) { 493 return nil 494 } 495 if len(d) < 2 || d[0] != '"' || d[len(d)-1] != '"' { 496 return fmt.Errorf("blob: expecting a JSON string to unmarshal, got %q", d) 497 } 498 d = d[1 : len(d)-1] 499 p, ok := ParseBytes(d) 500 if !ok { 501 return fmt.Errorf("blobref: invalid blobref %q (%d)", d, len(d)) 502 } 503 *r = p 504 return nil 505 } 506 507 func (r Ref) MarshalJSON() ([]byte, error) { 508 if !r.Valid() { 509 return null, nil 510 } 511 dname := r.digest.digestName() 512 bs := r.digest.bytes() 513 buf := make([]byte, 0, 3+len(dname)+len(bs)*2) 514 buf = append(buf, '"') 515 buf = r.appendString(buf) 516 buf = append(buf, '"') 517 return buf, nil 518 } 519 520 // MarshalBinary implements Go's encoding.BinaryMarshaler interface. 521 func (r Ref) MarshalBinary() (data []byte, err error) { 522 dname := r.digest.digestName() 523 bs := r.digest.bytes() 524 data = make([]byte, 0, len(dname)+1+len(bs)) 525 data = append(data, dname...) 526 data = append(data, '-') 527 data = append(data, bs...) 528 return 529 } 530 531 // UnmarshalBinary implements Go's encoding.BinaryUnmarshaler interface. 532 func (r *Ref) UnmarshalBinary(data []byte) error { 533 if r.digest != nil { 534 return errors.New("Can't UnmarshalBinary into a non-zero Ref") 535 } 536 i := bytes.IndexByte(data, '-') 537 if i < 1 { 538 return errors.New("no digest name") 539 } 540 541 digName := string(data[:i]) 542 buf := data[i+1:] 543 544 meta, ok := metaFromString[digName] 545 if !ok { 546 r2, ok := parseUnknown(digName, fmt.Sprintf("%x", buf)) 547 if !ok { 548 return errors.New("invalid blobref binary data") 549 } 550 *r = r2 551 return nil 552 } 553 if len(buf) != meta.size { 554 return errors.New("wrong size of data for digest " + digName) 555 } 556 r.digest = meta.ctor(buf) 557 return nil 558 } 559 560 // Less reports whether r sorts before o. Invalid references blobs sort first. 561 func (r Ref) Less(o Ref) bool { 562 if r.Valid() != o.Valid() { 563 return o.Valid() 564 } 565 if !r.Valid() { 566 return false 567 } 568 if n1, n2 := r.digest.digestName(), o.digest.digestName(); n1 != n2 { 569 return n1 < n2 570 } 571 return bytes.Compare(r.digest.bytes(), o.digest.bytes()) < 0 572 } 573 574 // ByRef sorts blob references. 575 type ByRef []Ref 576 577 func (s ByRef) Len() int { return len(s) } 578 func (s ByRef) Less(i, j int) bool { return s[i].Less(s[j]) } 579 func (s ByRef) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 580 581 // SizedByRef sorts SizedRefs by their blobref. 582 type SizedByRef []SizedRef 583 584 func (s SizedByRef) Len() int { return len(s) } 585 func (s SizedByRef) Less(i, j int) bool { return s[i].Less(s[j].Ref) } 586 func (s SizedByRef) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 587 588 // TypeAlphabet returns the valid characters in the given blobref type. 589 // It returns the empty string if the typ is unknown. 590 func TypeAlphabet(typ string) string { 591 switch typ { 592 case "sha1": 593 return hexDigit 594 } 595 return "" 596 }