github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/pkg/blob/ref.go (about) 1 /* 2 Copyright 2013 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package blob defines types to refer to and retrieve low-level Camlistore blobs. 18 package blob 19 20 import ( 21 "bytes" 22 "crypto/sha1" 23 "errors" 24 "fmt" 25 "hash" 26 "io" 27 "reflect" 28 "regexp" 29 "strings" 30 ) 31 32 // Pattern is the regular expression which matches a blobref. 33 // It does not contain ^ or $. 34 const Pattern = `\b([a-z][a-z0-9]*)-([a-f0-9]+)\b` 35 36 // whole blobref pattern 37 var blobRefPattern = regexp.MustCompile("^" + Pattern + "$") 38 39 // Ref is a reference to a Camlistore blob. 40 // It is used as a value type and supports equality (with ==) and the ability 41 // to use it as a map key. 42 type Ref struct { 43 digest digestType 44 } 45 46 // SizedRef is like a Ref but includes a size. 47 // It should also be used as a value type and supports equality. 48 type SizedRef struct { 49 Ref 50 Size int64 51 } 52 53 func (sr SizedRef) String() string { 54 return fmt.Sprintf("[%s; %d bytes]", sr.Ref.String(), sr.Size) 55 } 56 57 // digestType is an interface type, but any type implementing it must 58 // be of concrete type [N]byte, so it supports equality with ==, 59 // which is a requirement for ref. 60 type digestType interface { 61 bytes() []byte 62 digestName() string 63 newHash() hash.Hash 64 } 65 66 func (r Ref) String() string { 67 if r.digest == nil { 68 return "<invalid-blob.Ref>" 69 } 70 // TODO: maybe memoize this. 71 dname := r.digest.digestName() 72 bs := r.digest.bytes() 73 buf := getBuf(len(dname) + 1 + len(bs)*2)[:0] 74 defer putBuf(buf) 75 return string(r.appendString(buf)) 76 } 77 78 func (r Ref) appendString(buf []byte) []byte { 79 dname := r.digest.digestName() 80 bs := r.digest.bytes() 81 buf = append(buf, dname...) 82 buf = append(buf, '-') 83 for _, b := range bs { 84 buf = append(buf, hexDigit[b>>4], hexDigit[b&0xf]) 85 } 86 if o, ok := r.digest.(otherDigest); ok && o.odd { 87 buf = buf[:len(buf)-1] 88 } 89 return buf 90 } 91 92 // HashName returns the lowercase hash function name of the reference. 93 // It panics if r is zero. 94 func (r Ref) HashName() string { 95 if r.digest == nil { 96 panic("HashName called on invalid Ref") 97 } 98 return r.digest.digestName() 99 } 100 101 // Digest returns the lower hex digest of the blobref, without 102 // the e.g. "sha1-" prefix. It panics if r is zero. 103 func (r Ref) Digest() string { 104 if r.digest == nil { 105 panic("Digest called on invalid Ref") 106 } 107 bs := r.digest.bytes() 108 buf := getBuf(len(bs) * 2)[:0] 109 defer putBuf(buf) 110 for _, b := range bs { 111 buf = append(buf, hexDigit[b>>4], hexDigit[b&0xf]) 112 } 113 if o, ok := r.digest.(otherDigest); ok && o.odd { 114 buf = buf[:len(buf)-1] 115 } 116 return string(buf) 117 } 118 119 func (r Ref) DigestPrefix(digits int) string { 120 v := r.Digest() 121 if len(v) < digits { 122 return v 123 } 124 return v[:digits] 125 } 126 127 func (r Ref) DomID() string { 128 if !r.Valid() { 129 return "" 130 } 131 return "camli-" + r.String() 132 } 133 134 func (r Ref) Sum32() uint32 { 135 var v uint32 136 for _, b := range r.digest.bytes()[:4] { 137 v = v<<8 | uint32(b) 138 } 139 return v 140 } 141 142 func (r Ref) Sum64() uint64 { 143 var v uint64 144 for _, b := range r.digest.bytes()[:8] { 145 v = v<<8 | uint64(b) 146 } 147 return v 148 } 149 150 // Hash returns a new hash.Hash of r's type. 151 // It panics if r is zero. 152 func (r Ref) Hash() hash.Hash { 153 return r.digest.newHash() 154 } 155 156 func (r Ref) HashMatches(h hash.Hash) bool { 157 if r.digest == nil { 158 return false 159 } 160 return bytes.Equal(h.Sum(nil), r.digest.bytes()) 161 } 162 163 const hexDigit = "0123456789abcdef" 164 165 func (r Ref) Valid() bool { return r.digest != nil } 166 167 func (r Ref) IsSupported() bool { 168 if !r.Valid() { 169 return false 170 } 171 _, ok := metaFromString[r.digest.digestName()] 172 return ok 173 } 174 175 // Parse parse s as a blobref and returns the ref and whether it was 176 // parsed successfully. 177 func Parse(s string) (ref Ref, ok bool) { 178 i := strings.Index(s, "-") 179 if i < 0 { 180 return 181 } 182 name := s[:i] // e.g. "sha1" 183 hex := s[i+1:] 184 meta, ok := metaFromString[name] 185 if !ok { 186 return parseUnknown(name, hex) 187 } 188 if len(hex) != meta.size*2 { 189 ok = false 190 return 191 } 192 dt, ok := meta.ctors(hex) 193 if !ok { 194 return 195 } 196 return Ref{dt}, true 197 } 198 199 // ParseBytes is like Parse, but parses from a byte slice. 200 func ParseBytes(s []byte) (ref Ref, ok bool) { 201 i := bytes.IndexByte(s, '-') 202 if i < 0 { 203 return 204 } 205 name := s[:i] // e.g. "sha1" 206 hex := s[i+1:] 207 meta, ok := metaFromBytes(name) 208 if !ok { 209 return parseUnknown(string(name), string(hex)) 210 } 211 if len(hex) != meta.size*2 { 212 ok = false 213 return 214 } 215 dt, ok := meta.ctorb(hex) 216 if !ok { 217 return 218 } 219 return Ref{dt}, true 220 } 221 222 // Parse parse s as a blobref. If s is invalid, a zero Ref is returned 223 // which can be tested with the Valid method. 224 func ParseOrZero(s string) Ref { 225 ref, ok := Parse(s) 226 if !ok { 227 return Ref{} 228 } 229 return ref 230 } 231 232 // MustParse parse s as a blobref and panics on failure. 233 func MustParse(s string) Ref { 234 ref, ok := Parse(s) 235 if !ok { 236 panic("Invalid blobref " + s) 237 } 238 return ref 239 } 240 241 // '0' => 0 ... 'f' => 15, else sets *bad to true. 242 func hexVal(b byte, bad *bool) byte { 243 if '0' <= b && b <= '9' { 244 return b - '0' 245 } 246 if 'a' <= b && b <= 'f' { 247 return b - 'a' + 10 248 } 249 *bad = true 250 return 0 251 } 252 253 func validDigestName(name string) bool { 254 if name == "" { 255 return false 256 } 257 for _, r := range name { 258 if 'a' <= r && r <= 'z' { 259 continue 260 } 261 if '0' <= r && r <= '9' { 262 continue 263 } 264 return false 265 } 266 return true 267 } 268 269 // parseUnknown parses a blobref where the digest type isn't known to this server. 270 // e.g. ("foo-ababab") 271 func parseUnknown(digest, hex string) (ref Ref, ok bool) { 272 if !validDigestName(digest) { 273 return 274 } 275 276 // TODO: remove this short hack and don't allow odd numbers of hex digits. 277 odd := false 278 if len(hex)%2 != 0 { 279 hex += "0" 280 odd = true 281 } 282 283 if len(hex) < 2 || len(hex)%2 != 0 || len(hex) > maxOtherDigestLen*2 { 284 return 285 } 286 o := otherDigest{ 287 name: digest, 288 sumLen: len(hex) / 2, 289 odd: odd, 290 } 291 bad := false 292 for i := 0; i < len(hex); i += 2 { 293 o.sum[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad) 294 } 295 if bad { 296 return 297 } 298 return Ref{o}, true 299 } 300 301 func sha1FromBinary(b []byte) digestType { 302 var d sha1Digest 303 if len(d) != len(b) { 304 panic("bogus sha-1 length") 305 } 306 copy(d[:], b) 307 return d 308 } 309 310 func sha1FromHexString(hex string) (digestType, bool) { 311 var d sha1Digest 312 var bad bool 313 for i := 0; i < len(hex); i += 2 { 314 d[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad) 315 } 316 if bad { 317 return nil, false 318 } 319 return d, true 320 } 321 322 // yawn. exact copy of sha1FromHexString. 323 func sha1FromHexBytes(hex []byte) (digestType, bool) { 324 var d sha1Digest 325 var bad bool 326 for i := 0; i < len(hex); i += 2 { 327 d[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad) 328 } 329 if bad { 330 return nil, false 331 } 332 return d, true 333 } 334 335 // RefFromHash returns a blobref representing the given hash. 336 // It panics if the hash isn't of a known type. 337 func RefFromHash(h hash.Hash) Ref { 338 meta, ok := metaFromType[reflect.TypeOf(h)] 339 if !ok { 340 panic(fmt.Sprintf("Currently-unsupported hash type %T", h)) 341 } 342 return Ref{meta.ctor(h.Sum(nil))} 343 } 344 345 // RefFromString returns a blobref from the given string, for the currently 346 // recommended hash function 347 func RefFromString(s string) Ref { 348 return SHA1FromString(s) 349 } 350 351 // SHA1FromString returns a SHA-1 blobref of the provided string. 352 func SHA1FromString(s string) Ref { 353 s1 := sha1.New() 354 s1.Write([]byte(s)) 355 return RefFromHash(s1) 356 } 357 358 // SHA1FromBytes returns a SHA-1 blobref of the provided bytes. 359 func SHA1FromBytes(b []byte) Ref { 360 s1 := sha1.New() 361 s1.Write(b) 362 return RefFromHash(s1) 363 } 364 365 type sha1Digest [20]byte 366 367 func (s sha1Digest) digestName() string { return "sha1" } 368 func (s sha1Digest) bytes() []byte { return s[:] } 369 func (s sha1Digest) newHash() hash.Hash { return sha1.New() } 370 371 const maxOtherDigestLen = 128 372 373 type otherDigest struct { 374 name string 375 sum [maxOtherDigestLen]byte 376 sumLen int // bytes in sum that are valid 377 odd bool // odd number of hex digits in input 378 } 379 380 func (d otherDigest) digestName() string { return d.name } 381 func (d otherDigest) bytes() []byte { return d.sum[:d.sumLen] } 382 func (d otherDigest) newHash() hash.Hash { return nil } 383 384 var sha1Meta = &digestMeta{ 385 ctor: sha1FromBinary, 386 ctors: sha1FromHexString, 387 ctorb: sha1FromHexBytes, 388 size: sha1.Size, 389 } 390 391 var metaFromString = map[string]*digestMeta{ 392 "sha1": sha1Meta, 393 } 394 395 type blobTypeAndMeta struct { 396 name []byte 397 meta *digestMeta 398 } 399 400 var metas []blobTypeAndMeta 401 402 func metaFromBytes(name []byte) (meta *digestMeta, ok bool) { 403 for _, bm := range metas { 404 if bytes.Equal(name, bm.name) { 405 return bm.meta, true 406 } 407 } 408 return 409 } 410 411 func init() { 412 for name, meta := range metaFromString { 413 metas = append(metas, blobTypeAndMeta{ 414 name: []byte(name), 415 meta: meta, 416 }) 417 } 418 } 419 420 var sha1Type = reflect.TypeOf(sha1.New()) 421 422 var metaFromType = map[reflect.Type]*digestMeta{ 423 sha1Type: sha1Meta, 424 } 425 426 type digestMeta struct { 427 ctor func(binary []byte) digestType 428 ctors func(hex string) (digestType, bool) 429 ctorb func(hex []byte) (digestType, bool) 430 size int // bytes of digest 431 } 432 433 var bufPool = make(chan []byte, 20) 434 435 func getBuf(size int) []byte { 436 for { 437 select { 438 case b := <-bufPool: 439 if cap(b) >= size { 440 return b[:size] 441 } 442 default: 443 return make([]byte, size) 444 } 445 } 446 } 447 448 func putBuf(b []byte) { 449 select { 450 case bufPool <- b: 451 default: 452 } 453 } 454 455 // NewHash returns a new hash.Hash of the currently recommended hash type. 456 // Currently this is just SHA-1, but will likely change within the next 457 // year or so. 458 func NewHash() hash.Hash { 459 return sha1.New() 460 } 461 462 func ValidRefString(s string) bool { 463 // TODO: optimize to not allocate 464 return ParseOrZero(s).Valid() 465 } 466 467 var null = []byte(`null`) 468 469 func (r *Ref) UnmarshalJSON(d []byte) error { 470 if r.digest != nil { 471 return errors.New("Can't UnmarshalJSON into a non-zero Ref") 472 } 473 if len(d) == 0 || bytes.Equal(d, null) { 474 return nil 475 } 476 if len(d) < 2 || d[0] != '"' || d[len(d)-1] != '"' { 477 return fmt.Errorf("blob: expecting a JSON string to unmarshal, got %q", d) 478 } 479 d = d[1 : len(d)-1] 480 p, ok := ParseBytes(d) 481 if !ok { 482 return fmt.Errorf("blobref: invalid blobref %q (%d)", d, len(d)) 483 } 484 *r = p 485 return nil 486 } 487 488 func (r Ref) MarshalJSON() ([]byte, error) { 489 if !r.Valid() { 490 return null, nil 491 } 492 dname := r.digest.digestName() 493 bs := r.digest.bytes() 494 buf := make([]byte, 0, 3+len(dname)+len(bs)*2) 495 buf = append(buf, '"') 496 buf = r.appendString(buf) 497 buf = append(buf, '"') 498 return buf, nil 499 } 500 501 // MarshalBinary implements Go's encoding.BinaryMarshaler interface. 502 func (r Ref) MarshalBinary() (data []byte, err error) { 503 dname := r.digest.digestName() 504 bs := r.digest.bytes() 505 data = make([]byte, 0, len(dname)+1+len(bs)) 506 data = append(data, dname...) 507 data = append(data, '-') 508 data = append(data, bs...) 509 return 510 } 511 512 // UnmarshalBinary implements Go's encoding.BinaryUnmarshaler interface. 513 func (r *Ref) UnmarshalBinary(data []byte) error { 514 if r.digest != nil { 515 return errors.New("Can't UnmarshalBinary into a non-zero Ref") 516 } 517 i := bytes.IndexByte(data, '-') 518 if i < 1 { 519 return errors.New("no digest name") 520 } 521 522 digName := string(data[:i]) 523 buf := data[i+1:] 524 525 meta, ok := metaFromString[digName] 526 if !ok { 527 r2, ok := parseUnknown(digName, fmt.Sprintf("%x", buf)) 528 if !ok { 529 return errors.New("invalid blobref binary data") 530 } 531 *r = r2 532 return nil 533 } 534 if len(buf) != meta.size { 535 return errors.New("wrong size of data for digest " + digName) 536 } 537 r.digest = meta.ctor(buf) 538 return nil 539 } 540 541 // Less reports whether r sorts before o. Invalid references blobs sort first. 542 func (r Ref) Less(o Ref) bool { 543 if r.Valid() != o.Valid() { 544 return o.Valid() 545 } 546 if !r.Valid() { 547 return false 548 } 549 if n1, n2 := r.digest.digestName(), o.digest.digestName(); n1 != n2 { 550 return n1 < n2 551 } 552 return bytes.Compare(r.digest.bytes(), o.digest.bytes()) < 0 553 } 554 555 // ByRef sorts blob references. 556 type ByRef []Ref 557 558 func (s ByRef) Len() int { return len(s) } 559 func (s ByRef) Less(i, j int) bool { return s[i].Less(s[j]) } 560 func (s ByRef) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 561 562 // SizedByRef sorts SizedRefs by their blobref. 563 type SizedByRef []SizedRef 564 565 func (s SizedByRef) Len() int { return len(s) } 566 func (s SizedByRef) Less(i, j int) bool { return s[i].Less(s[j].Ref) } 567 func (s SizedByRef) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 568 569 // Blob represents a blob. Use the methods Size, SizedRef and 570 // Open to query and get data from Blob. 571 type Blob struct { 572 ref Ref 573 size uint32 574 newReader func() io.ReadCloser 575 } 576 577 // NewBlob constructs a Blob from its Ref, size and a function that 578 // returns an io.ReadCloser from which the blob can be read. Any error 579 // in the function newReader when constructing the io.ReadCloser should 580 // be returned upon the first call to Read or Close. 581 func NewBlob(ref Ref, size uint32, newReader func() io.ReadCloser) Blob { 582 return Blob{ref, size, newReader} 583 } 584 585 // Size returns the size of the blob (in bytes). 586 func (b Blob) Size() uint32 { 587 return b.size 588 } 589 590 // SizedRef returns the SizedRef corresponding to the blob. 591 func (b Blob) SizedRef() SizedRef { 592 return SizedRef{b.ref, int64(b.size)} 593 } 594 595 // Open returns an io.ReadCloser that can be used to read the blob 596 // data. The caller must close the io.ReadCloser when finished. 597 func (b Blob) Open() io.ReadCloser { 598 return b.newReader() 599 }