github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/pkg/blob/ref.go (about)

     1  /*
     2  Copyright 2013 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package blob defines types to refer to and retrieve low-level Camlistore blobs.
    18  package blob
    19  
    20  import (
    21  	"bytes"
    22  	"crypto/sha1"
    23  	"errors"
    24  	"fmt"
    25  	"hash"
    26  	"io"
    27  	"reflect"
    28  	"regexp"
    29  	"strings"
    30  )
    31  
    32  // Pattern is the regular expression which matches a blobref.
    33  // It does not contain ^ or $.
    34  const Pattern = `\b([a-z][a-z0-9]*)-([a-f0-9]+)\b`
    35  
    36  // whole blobref pattern
    37  var blobRefPattern = regexp.MustCompile("^" + Pattern + "$")
    38  
    39  // Ref is a reference to a Camlistore blob.
    40  // It is used as a value type and supports equality (with ==) and the ability
    41  // to use it as a map key.
    42  type Ref struct {
    43  	digest digestType
    44  }
    45  
    46  // SizedRef is like a Ref but includes a size.
    47  // It should also be used as a value type and supports equality.
    48  type SizedRef struct {
    49  	Ref
    50  	Size int64
    51  }
    52  
    53  func (sr SizedRef) String() string {
    54  	return fmt.Sprintf("[%s; %d bytes]", sr.Ref.String(), sr.Size)
    55  }
    56  
    57  // digestType is an interface type, but any type implementing it must
    58  // be of concrete type [N]byte, so it supports equality with ==,
    59  // which is a requirement for ref.
    60  type digestType interface {
    61  	bytes() []byte
    62  	digestName() string
    63  	newHash() hash.Hash
    64  }
    65  
    66  func (r Ref) String() string {
    67  	if r.digest == nil {
    68  		return "<invalid-blob.Ref>"
    69  	}
    70  	// TODO: maybe memoize this.
    71  	dname := r.digest.digestName()
    72  	bs := r.digest.bytes()
    73  	buf := getBuf(len(dname) + 1 + len(bs)*2)[:0]
    74  	defer putBuf(buf)
    75  	return string(r.appendString(buf))
    76  }
    77  
    78  func (r Ref) appendString(buf []byte) []byte {
    79  	dname := r.digest.digestName()
    80  	bs := r.digest.bytes()
    81  	buf = append(buf, dname...)
    82  	buf = append(buf, '-')
    83  	for _, b := range bs {
    84  		buf = append(buf, hexDigit[b>>4], hexDigit[b&0xf])
    85  	}
    86  	if o, ok := r.digest.(otherDigest); ok && o.odd {
    87  		buf = buf[:len(buf)-1]
    88  	}
    89  	return buf
    90  }
    91  
    92  // HashName returns the lowercase hash function name of the reference.
    93  // It panics if r is zero.
    94  func (r Ref) HashName() string {
    95  	if r.digest == nil {
    96  		panic("HashName called on invalid Ref")
    97  	}
    98  	return r.digest.digestName()
    99  }
   100  
   101  // Digest returns the lower hex digest of the blobref, without
   102  // the e.g. "sha1-" prefix. It panics if r is zero.
   103  func (r Ref) Digest() string {
   104  	if r.digest == nil {
   105  		panic("Digest called on invalid Ref")
   106  	}
   107  	bs := r.digest.bytes()
   108  	buf := getBuf(len(bs) * 2)[:0]
   109  	defer putBuf(buf)
   110  	for _, b := range bs {
   111  		buf = append(buf, hexDigit[b>>4], hexDigit[b&0xf])
   112  	}
   113  	if o, ok := r.digest.(otherDigest); ok && o.odd {
   114  		buf = buf[:len(buf)-1]
   115  	}
   116  	return string(buf)
   117  }
   118  
   119  func (r Ref) DigestPrefix(digits int) string {
   120  	v := r.Digest()
   121  	if len(v) < digits {
   122  		return v
   123  	}
   124  	return v[:digits]
   125  }
   126  
   127  func (r Ref) DomID() string {
   128  	if !r.Valid() {
   129  		return ""
   130  	}
   131  	return "camli-" + r.String()
   132  }
   133  
   134  func (r Ref) Sum32() uint32 {
   135  	var v uint32
   136  	for _, b := range r.digest.bytes()[:4] {
   137  		v = v<<8 | uint32(b)
   138  	}
   139  	return v
   140  }
   141  
   142  func (r Ref) Sum64() uint64 {
   143  	var v uint64
   144  	for _, b := range r.digest.bytes()[:8] {
   145  		v = v<<8 | uint64(b)
   146  	}
   147  	return v
   148  }
   149  
   150  // Hash returns a new hash.Hash of r's type.
   151  // It panics if r is zero.
   152  func (r Ref) Hash() hash.Hash {
   153  	return r.digest.newHash()
   154  }
   155  
   156  func (r Ref) HashMatches(h hash.Hash) bool {
   157  	if r.digest == nil {
   158  		return false
   159  	}
   160  	return bytes.Equal(h.Sum(nil), r.digest.bytes())
   161  }
   162  
   163  const hexDigit = "0123456789abcdef"
   164  
   165  func (r Ref) Valid() bool { return r.digest != nil }
   166  
   167  func (r Ref) IsSupported() bool {
   168  	if !r.Valid() {
   169  		return false
   170  	}
   171  	_, ok := metaFromString[r.digest.digestName()]
   172  	return ok
   173  }
   174  
   175  // Parse parse s as a blobref and returns the ref and whether it was
   176  // parsed successfully.
   177  func Parse(s string) (ref Ref, ok bool) {
   178  	i := strings.Index(s, "-")
   179  	if i < 0 {
   180  		return
   181  	}
   182  	name := s[:i] // e.g. "sha1"
   183  	hex := s[i+1:]
   184  	meta, ok := metaFromString[name]
   185  	if !ok {
   186  		return parseUnknown(name, hex)
   187  	}
   188  	if len(hex) != meta.size*2 {
   189  		ok = false
   190  		return
   191  	}
   192  	dt, ok := meta.ctors(hex)
   193  	if !ok {
   194  		return
   195  	}
   196  	return Ref{dt}, true
   197  }
   198  
   199  // ParseBytes is like Parse, but parses from a byte slice.
   200  func ParseBytes(s []byte) (ref Ref, ok bool) {
   201  	i := bytes.IndexByte(s, '-')
   202  	if i < 0 {
   203  		return
   204  	}
   205  	name := s[:i] // e.g. "sha1"
   206  	hex := s[i+1:]
   207  	meta, ok := metaFromBytes(name)
   208  	if !ok {
   209  		return parseUnknown(string(name), string(hex))
   210  	}
   211  	if len(hex) != meta.size*2 {
   212  		ok = false
   213  		return
   214  	}
   215  	dt, ok := meta.ctorb(hex)
   216  	if !ok {
   217  		return
   218  	}
   219  	return Ref{dt}, true
   220  }
   221  
   222  // Parse parse s as a blobref. If s is invalid, a zero Ref is returned
   223  // which can be tested with the Valid method.
   224  func ParseOrZero(s string) Ref {
   225  	ref, ok := Parse(s)
   226  	if !ok {
   227  		return Ref{}
   228  	}
   229  	return ref
   230  }
   231  
   232  // MustParse parse s as a blobref and panics on failure.
   233  func MustParse(s string) Ref {
   234  	ref, ok := Parse(s)
   235  	if !ok {
   236  		panic("Invalid blobref " + s)
   237  	}
   238  	return ref
   239  }
   240  
   241  // '0' => 0 ... 'f' => 15, else sets *bad to true.
   242  func hexVal(b byte, bad *bool) byte {
   243  	if '0' <= b && b <= '9' {
   244  		return b - '0'
   245  	}
   246  	if 'a' <= b && b <= 'f' {
   247  		return b - 'a' + 10
   248  	}
   249  	*bad = true
   250  	return 0
   251  }
   252  
   253  func validDigestName(name string) bool {
   254  	if name == "" {
   255  		return false
   256  	}
   257  	for _, r := range name {
   258  		if 'a' <= r && r <= 'z' {
   259  			continue
   260  		}
   261  		if '0' <= r && r <= '9' {
   262  			continue
   263  		}
   264  		return false
   265  	}
   266  	return true
   267  }
   268  
   269  // parseUnknown parses a blobref where the digest type isn't known to this server.
   270  // e.g. ("foo-ababab")
   271  func parseUnknown(digest, hex string) (ref Ref, ok bool) {
   272  	if !validDigestName(digest) {
   273  		return
   274  	}
   275  
   276  	// TODO: remove this short hack and don't allow odd numbers of hex digits.
   277  	odd := false
   278  	if len(hex)%2 != 0 {
   279  		hex += "0"
   280  		odd = true
   281  	}
   282  
   283  	if len(hex) < 2 || len(hex)%2 != 0 || len(hex) > maxOtherDigestLen*2 {
   284  		return
   285  	}
   286  	o := otherDigest{
   287  		name:   digest,
   288  		sumLen: len(hex) / 2,
   289  		odd:    odd,
   290  	}
   291  	bad := false
   292  	for i := 0; i < len(hex); i += 2 {
   293  		o.sum[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad)
   294  	}
   295  	if bad {
   296  		return
   297  	}
   298  	return Ref{o}, true
   299  }
   300  
   301  func sha1FromBinary(b []byte) digestType {
   302  	var d sha1Digest
   303  	if len(d) != len(b) {
   304  		panic("bogus sha-1 length")
   305  	}
   306  	copy(d[:], b)
   307  	return d
   308  }
   309  
   310  func sha1FromHexString(hex string) (digestType, bool) {
   311  	var d sha1Digest
   312  	var bad bool
   313  	for i := 0; i < len(hex); i += 2 {
   314  		d[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad)
   315  	}
   316  	if bad {
   317  		return nil, false
   318  	}
   319  	return d, true
   320  }
   321  
   322  // yawn. exact copy of sha1FromHexString.
   323  func sha1FromHexBytes(hex []byte) (digestType, bool) {
   324  	var d sha1Digest
   325  	var bad bool
   326  	for i := 0; i < len(hex); i += 2 {
   327  		d[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad)
   328  	}
   329  	if bad {
   330  		return nil, false
   331  	}
   332  	return d, true
   333  }
   334  
   335  // RefFromHash returns a blobref representing the given hash.
   336  // It panics if the hash isn't of a known type.
   337  func RefFromHash(h hash.Hash) Ref {
   338  	meta, ok := metaFromType[reflect.TypeOf(h)]
   339  	if !ok {
   340  		panic(fmt.Sprintf("Currently-unsupported hash type %T", h))
   341  	}
   342  	return Ref{meta.ctor(h.Sum(nil))}
   343  }
   344  
   345  // RefFromString returns a blobref from the given string, for the currently
   346  // recommended hash function
   347  func RefFromString(s string) Ref {
   348  	return SHA1FromString(s)
   349  }
   350  
   351  // SHA1FromString returns a SHA-1 blobref of the provided string.
   352  func SHA1FromString(s string) Ref {
   353  	s1 := sha1.New()
   354  	s1.Write([]byte(s))
   355  	return RefFromHash(s1)
   356  }
   357  
   358  // SHA1FromBytes returns a SHA-1 blobref of the provided bytes.
   359  func SHA1FromBytes(b []byte) Ref {
   360  	s1 := sha1.New()
   361  	s1.Write(b)
   362  	return RefFromHash(s1)
   363  }
   364  
   365  type sha1Digest [20]byte
   366  
   367  func (s sha1Digest) digestName() string { return "sha1" }
   368  func (s sha1Digest) bytes() []byte      { return s[:] }
   369  func (s sha1Digest) newHash() hash.Hash { return sha1.New() }
   370  
   371  const maxOtherDigestLen = 128
   372  
   373  type otherDigest struct {
   374  	name   string
   375  	sum    [maxOtherDigestLen]byte
   376  	sumLen int  // bytes in sum that are valid
   377  	odd    bool // odd number of hex digits in input
   378  }
   379  
   380  func (d otherDigest) digestName() string { return d.name }
   381  func (d otherDigest) bytes() []byte      { return d.sum[:d.sumLen] }
   382  func (d otherDigest) newHash() hash.Hash { return nil }
   383  
   384  var sha1Meta = &digestMeta{
   385  	ctor:  sha1FromBinary,
   386  	ctors: sha1FromHexString,
   387  	ctorb: sha1FromHexBytes,
   388  	size:  sha1.Size,
   389  }
   390  
   391  var metaFromString = map[string]*digestMeta{
   392  	"sha1": sha1Meta,
   393  }
   394  
   395  type blobTypeAndMeta struct {
   396  	name []byte
   397  	meta *digestMeta
   398  }
   399  
   400  var metas []blobTypeAndMeta
   401  
   402  func metaFromBytes(name []byte) (meta *digestMeta, ok bool) {
   403  	for _, bm := range metas {
   404  		if bytes.Equal(name, bm.name) {
   405  			return bm.meta, true
   406  		}
   407  	}
   408  	return
   409  }
   410  
   411  func init() {
   412  	for name, meta := range metaFromString {
   413  		metas = append(metas, blobTypeAndMeta{
   414  			name: []byte(name),
   415  			meta: meta,
   416  		})
   417  	}
   418  }
   419  
   420  var sha1Type = reflect.TypeOf(sha1.New())
   421  
   422  var metaFromType = map[reflect.Type]*digestMeta{
   423  	sha1Type: sha1Meta,
   424  }
   425  
   426  type digestMeta struct {
   427  	ctor  func(binary []byte) digestType
   428  	ctors func(hex string) (digestType, bool)
   429  	ctorb func(hex []byte) (digestType, bool)
   430  	size  int // bytes of digest
   431  }
   432  
   433  var bufPool = make(chan []byte, 20)
   434  
   435  func getBuf(size int) []byte {
   436  	for {
   437  		select {
   438  		case b := <-bufPool:
   439  			if cap(b) >= size {
   440  				return b[:size]
   441  			}
   442  		default:
   443  			return make([]byte, size)
   444  		}
   445  	}
   446  }
   447  
   448  func putBuf(b []byte) {
   449  	select {
   450  	case bufPool <- b:
   451  	default:
   452  	}
   453  }
   454  
   455  // NewHash returns a new hash.Hash of the currently recommended hash type.
   456  // Currently this is just SHA-1, but will likely change within the next
   457  // year or so.
   458  func NewHash() hash.Hash {
   459  	return sha1.New()
   460  }
   461  
   462  func ValidRefString(s string) bool {
   463  	// TODO: optimize to not allocate
   464  	return ParseOrZero(s).Valid()
   465  }
   466  
   467  var null = []byte(`null`)
   468  
   469  func (r *Ref) UnmarshalJSON(d []byte) error {
   470  	if r.digest != nil {
   471  		return errors.New("Can't UnmarshalJSON into a non-zero Ref")
   472  	}
   473  	if len(d) == 0 || bytes.Equal(d, null) {
   474  		return nil
   475  	}
   476  	if len(d) < 2 || d[0] != '"' || d[len(d)-1] != '"' {
   477  		return fmt.Errorf("blob: expecting a JSON string to unmarshal, got %q", d)
   478  	}
   479  	d = d[1 : len(d)-1]
   480  	p, ok := ParseBytes(d)
   481  	if !ok {
   482  		return fmt.Errorf("blobref: invalid blobref %q (%d)", d, len(d))
   483  	}
   484  	*r = p
   485  	return nil
   486  }
   487  
   488  func (r Ref) MarshalJSON() ([]byte, error) {
   489  	if !r.Valid() {
   490  		return null, nil
   491  	}
   492  	dname := r.digest.digestName()
   493  	bs := r.digest.bytes()
   494  	buf := make([]byte, 0, 3+len(dname)+len(bs)*2)
   495  	buf = append(buf, '"')
   496  	buf = r.appendString(buf)
   497  	buf = append(buf, '"')
   498  	return buf, nil
   499  }
   500  
   501  // MarshalBinary implements Go's encoding.BinaryMarshaler interface.
   502  func (r Ref) MarshalBinary() (data []byte, err error) {
   503  	dname := r.digest.digestName()
   504  	bs := r.digest.bytes()
   505  	data = make([]byte, 0, len(dname)+1+len(bs))
   506  	data = append(data, dname...)
   507  	data = append(data, '-')
   508  	data = append(data, bs...)
   509  	return
   510  }
   511  
   512  // UnmarshalBinary implements Go's encoding.BinaryUnmarshaler interface.
   513  func (r *Ref) UnmarshalBinary(data []byte) error {
   514  	if r.digest != nil {
   515  		return errors.New("Can't UnmarshalBinary into a non-zero Ref")
   516  	}
   517  	i := bytes.IndexByte(data, '-')
   518  	if i < 1 {
   519  		return errors.New("no digest name")
   520  	}
   521  
   522  	digName := string(data[:i])
   523  	buf := data[i+1:]
   524  
   525  	meta, ok := metaFromString[digName]
   526  	if !ok {
   527  		r2, ok := parseUnknown(digName, fmt.Sprintf("%x", buf))
   528  		if !ok {
   529  			return errors.New("invalid blobref binary data")
   530  		}
   531  		*r = r2
   532  		return nil
   533  	}
   534  	if len(buf) != meta.size {
   535  		return errors.New("wrong size of data for digest " + digName)
   536  	}
   537  	r.digest = meta.ctor(buf)
   538  	return nil
   539  }
   540  
   541  // Less reports whether r sorts before o. Invalid references blobs sort first.
   542  func (r Ref) Less(o Ref) bool {
   543  	if r.Valid() != o.Valid() {
   544  		return o.Valid()
   545  	}
   546  	if !r.Valid() {
   547  		return false
   548  	}
   549  	if n1, n2 := r.digest.digestName(), o.digest.digestName(); n1 != n2 {
   550  		return n1 < n2
   551  	}
   552  	return bytes.Compare(r.digest.bytes(), o.digest.bytes()) < 0
   553  }
   554  
   555  // ByRef sorts blob references.
   556  type ByRef []Ref
   557  
   558  func (s ByRef) Len() int           { return len(s) }
   559  func (s ByRef) Less(i, j int) bool { return s[i].Less(s[j]) }
   560  func (s ByRef) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
   561  
   562  // SizedByRef sorts SizedRefs by their blobref.
   563  type SizedByRef []SizedRef
   564  
   565  func (s SizedByRef) Len() int           { return len(s) }
   566  func (s SizedByRef) Less(i, j int) bool { return s[i].Less(s[j].Ref) }
   567  func (s SizedByRef) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
   568  
   569  // Blob represents a blob. Use the methods Size, SizedRef and
   570  // Open to query and get data from Blob.
   571  type Blob struct {
   572  	ref       Ref
   573  	size      uint32
   574  	newReader func() io.ReadCloser
   575  }
   576  
   577  // NewBlob constructs a Blob from its Ref, size and a function that
   578  // returns an io.ReadCloser from which the blob can be read. Any error
   579  // in the function newReader when constructing the io.ReadCloser should
   580  // be returned upon the first call to Read or Close.
   581  func NewBlob(ref Ref, size uint32, newReader func() io.ReadCloser) Blob {
   582  	return Blob{ref, size, newReader}
   583  }
   584  
   585  // Size returns the size of the blob (in bytes).
   586  func (b Blob) Size() uint32 {
   587  	return b.size
   588  }
   589  
   590  // SizedRef returns the SizedRef corresponding to the blob.
   591  func (b Blob) SizedRef() SizedRef {
   592  	return SizedRef{b.ref, int64(b.size)}
   593  }
   594  
   595  // Open returns an io.ReadCloser that can be used to read the blob
   596  // data. The caller must close the io.ReadCloser when finished.
   597  func (b Blob) Open() io.ReadCloser {
   598  	return b.newReader()
   599  }