github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/blob/ref.go (about)

     1  /*
     2  Copyright 2013 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package blob defines types to refer to and retrieve low-level Camlistore blobs.
    18  package blob
    19  
    20  import (
    21  	"bytes"
    22  	"crypto/sha1"
    23  	"errors"
    24  	"fmt"
    25  	"hash"
    26  	"reflect"
    27  	"regexp"
    28  	"strings"
    29  )
    30  
    31  // Pattern is the regular expression which matches a blobref.
    32  // It does not contain ^ or $.
    33  const Pattern = `\b([a-z][a-z0-9]*)-([a-f0-9]+)\b`
    34  
    35  // whole blobref pattern
    36  var blobRefPattern = regexp.MustCompile("^" + Pattern + "$")
    37  
    38  // Ref is a reference to a Camlistore blob.
    39  // It is used as a value type and supports equality (with ==) and the ability
    40  // to use it as a map key.
    41  type Ref struct {
    42  	digest digestType
    43  }
    44  
    45  // SizedRef is like a Ref but includes a size.
    46  // It should also be used as a value type and supports equality.
    47  type SizedRef struct {
    48  	Ref
    49  	Size uint32
    50  }
    51  
    52  func (sr SizedRef) String() string {
    53  	return fmt.Sprintf("[%s; %d bytes]", sr.Ref.String(), sr.Size)
    54  }
    55  
    56  // digestType is an interface type, but any type implementing it must
    57  // be of concrete type [N]byte, so it supports equality with ==,
    58  // which is a requirement for ref.
    59  type digestType interface {
    60  	bytes() []byte
    61  	digestName() string
    62  	newHash() hash.Hash
    63  }
    64  
    65  func (r Ref) String() string {
    66  	if r.digest == nil {
    67  		return "<invalid-blob.Ref>"
    68  	}
    69  	// TODO: maybe memoize this.
    70  	dname := r.digest.digestName()
    71  	bs := r.digest.bytes()
    72  	buf := getBuf(len(dname) + 1 + len(bs)*2)[:0]
    73  	defer putBuf(buf)
    74  	return string(r.appendString(buf))
    75  }
    76  
    77  func (r Ref) appendString(buf []byte) []byte {
    78  	dname := r.digest.digestName()
    79  	bs := r.digest.bytes()
    80  	buf = append(buf, dname...)
    81  	buf = append(buf, '-')
    82  	for _, b := range bs {
    83  		buf = append(buf, hexDigit[b>>4], hexDigit[b&0xf])
    84  	}
    85  	if o, ok := r.digest.(otherDigest); ok && o.odd {
    86  		buf = buf[:len(buf)-1]
    87  	}
    88  	return buf
    89  }
    90  
    91  // HashName returns the lowercase hash function name of the reference.
    92  // It panics if r is zero.
    93  func (r Ref) HashName() string {
    94  	if r.digest == nil {
    95  		panic("HashName called on invalid Ref")
    96  	}
    97  	return r.digest.digestName()
    98  }
    99  
   100  // Digest returns the lower hex digest of the blobref, without
   101  // the e.g. "sha1-" prefix. It panics if r is zero.
   102  func (r Ref) Digest() string {
   103  	if r.digest == nil {
   104  		panic("Digest called on invalid Ref")
   105  	}
   106  	bs := r.digest.bytes()
   107  	buf := getBuf(len(bs) * 2)[:0]
   108  	defer putBuf(buf)
   109  	for _, b := range bs {
   110  		buf = append(buf, hexDigit[b>>4], hexDigit[b&0xf])
   111  	}
   112  	if o, ok := r.digest.(otherDigest); ok && o.odd {
   113  		buf = buf[:len(buf)-1]
   114  	}
   115  	return string(buf)
   116  }
   117  
   118  func (r Ref) DigestPrefix(digits int) string {
   119  	v := r.Digest()
   120  	if len(v) < digits {
   121  		return v
   122  	}
   123  	return v[:digits]
   124  }
   125  
   126  func (r Ref) DomID() string {
   127  	if !r.Valid() {
   128  		return ""
   129  	}
   130  	return "camli-" + r.String()
   131  }
   132  
   133  func (r Ref) Sum32() uint32 {
   134  	var v uint32
   135  	for _, b := range r.digest.bytes()[:4] {
   136  		v = v<<8 | uint32(b)
   137  	}
   138  	return v
   139  }
   140  
   141  func (r Ref) Sum64() uint64 {
   142  	var v uint64
   143  	for _, b := range r.digest.bytes()[:8] {
   144  		v = v<<8 | uint64(b)
   145  	}
   146  	return v
   147  }
   148  
   149  // Hash returns a new hash.Hash of r's type.
   150  // It panics if r is zero.
   151  func (r Ref) Hash() hash.Hash {
   152  	return r.digest.newHash()
   153  }
   154  
   155  func (r Ref) HashMatches(h hash.Hash) bool {
   156  	if r.digest == nil {
   157  		return false
   158  	}
   159  	return bytes.Equal(h.Sum(nil), r.digest.bytes())
   160  }
   161  
   162  const hexDigit = "0123456789abcdef"
   163  
   164  func (r Ref) Valid() bool { return r.digest != nil }
   165  
   166  func (r Ref) IsSupported() bool {
   167  	if !r.Valid() {
   168  		return false
   169  	}
   170  	_, ok := metaFromString[r.digest.digestName()]
   171  	return ok
   172  }
   173  
   174  // ParseKnown is like Parse, but only parse blobrefs known to this
   175  // server. It returns ok == false for well-formed but unsupported
   176  // blobrefs.
   177  func ParseKnown(s string) (ref Ref, ok bool) {
   178  	return parse(s, false)
   179  }
   180  
   181  // Parse parse s as a blobref and returns the ref and whether it was
   182  // parsed successfully.
   183  func Parse(s string) (ref Ref, ok bool) {
   184  	return parse(s, true)
   185  }
   186  
   187  func parse(s string, allowAll bool) (ref Ref, ok bool) {
   188  	i := strings.Index(s, "-")
   189  	if i < 0 {
   190  		return
   191  	}
   192  	name := s[:i] // e.g. "sha1"
   193  	hex := s[i+1:]
   194  	meta, ok := metaFromString[name]
   195  	if !ok {
   196  		if allowAll || testRefType[name] {
   197  			return parseUnknown(name, hex)
   198  		}
   199  		return
   200  	}
   201  	if len(hex) != meta.size*2 {
   202  		ok = false
   203  		return
   204  	}
   205  	dt, ok := meta.ctors(hex)
   206  	if !ok {
   207  		return
   208  	}
   209  	return Ref{dt}, true
   210  }
   211  
   212  var testRefType = map[string]bool{
   213  	"fakeref": true,
   214  	"testref": true,
   215  	"perma":   true,
   216  }
   217  
   218  // ParseBytes is like Parse, but parses from a byte slice.
   219  func ParseBytes(s []byte) (ref Ref, ok bool) {
   220  	i := bytes.IndexByte(s, '-')
   221  	if i < 0 {
   222  		return
   223  	}
   224  	name := s[:i] // e.g. "sha1"
   225  	hex := s[i+1:]
   226  	meta, ok := metaFromBytes(name)
   227  	if !ok {
   228  		return parseUnknown(string(name), string(hex))
   229  	}
   230  	if len(hex) != meta.size*2 {
   231  		ok = false
   232  		return
   233  	}
   234  	dt, ok := meta.ctorb(hex)
   235  	if !ok {
   236  		return
   237  	}
   238  	return Ref{dt}, true
   239  }
   240  
   241  // Parse parse s as a blobref. If s is invalid, a zero Ref is returned
   242  // which can be tested with the Valid method.
   243  func ParseOrZero(s string) Ref {
   244  	ref, ok := Parse(s)
   245  	if !ok {
   246  		return Ref{}
   247  	}
   248  	return ref
   249  }
   250  
   251  // MustParse parse s as a blobref and panics on failure.
   252  func MustParse(s string) Ref {
   253  	ref, ok := Parse(s)
   254  	if !ok {
   255  		panic("Invalid blobref " + s)
   256  	}
   257  	return ref
   258  }
   259  
   260  // '0' => 0 ... 'f' => 15, else sets *bad to true.
   261  func hexVal(b byte, bad *bool) byte {
   262  	if '0' <= b && b <= '9' {
   263  		return b - '0'
   264  	}
   265  	if 'a' <= b && b <= 'f' {
   266  		return b - 'a' + 10
   267  	}
   268  	*bad = true
   269  	return 0
   270  }
   271  
   272  func validDigestName(name string) bool {
   273  	if name == "" {
   274  		return false
   275  	}
   276  	for _, r := range name {
   277  		if 'a' <= r && r <= 'z' {
   278  			continue
   279  		}
   280  		if '0' <= r && r <= '9' {
   281  			continue
   282  		}
   283  		return false
   284  	}
   285  	return true
   286  }
   287  
   288  // parseUnknown parses a blobref where the digest type isn't known to this server.
   289  // e.g. ("foo-ababab")
   290  func parseUnknown(digest, hex string) (ref Ref, ok bool) {
   291  	if !validDigestName(digest) {
   292  		return
   293  	}
   294  
   295  	// TODO: remove this short hack and don't allow odd numbers of hex digits.
   296  	odd := false
   297  	if len(hex)%2 != 0 {
   298  		hex += "0"
   299  		odd = true
   300  	}
   301  
   302  	if len(hex) < 2 || len(hex)%2 != 0 || len(hex) > maxOtherDigestLen*2 {
   303  		return
   304  	}
   305  	o := otherDigest{
   306  		name:   digest,
   307  		sumLen: len(hex) / 2,
   308  		odd:    odd,
   309  	}
   310  	bad := false
   311  	for i := 0; i < len(hex); i += 2 {
   312  		o.sum[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad)
   313  	}
   314  	if bad {
   315  		return
   316  	}
   317  	return Ref{o}, true
   318  }
   319  
   320  func sha1FromBinary(b []byte) digestType {
   321  	var d sha1Digest
   322  	if len(d) != len(b) {
   323  		panic("bogus sha-1 length")
   324  	}
   325  	copy(d[:], b)
   326  	return d
   327  }
   328  
   329  func sha1FromHexString(hex string) (digestType, bool) {
   330  	var d sha1Digest
   331  	var bad bool
   332  	for i := 0; i < len(hex); i += 2 {
   333  		d[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad)
   334  	}
   335  	if bad {
   336  		return nil, false
   337  	}
   338  	return d, true
   339  }
   340  
   341  // yawn. exact copy of sha1FromHexString.
   342  func sha1FromHexBytes(hex []byte) (digestType, bool) {
   343  	var d sha1Digest
   344  	var bad bool
   345  	for i := 0; i < len(hex); i += 2 {
   346  		d[i/2] = hexVal(hex[i], &bad)<<4 | hexVal(hex[i+1], &bad)
   347  	}
   348  	if bad {
   349  		return nil, false
   350  	}
   351  	return d, true
   352  }
   353  
   354  // RefFromHash returns a blobref representing the given hash.
   355  // It panics if the hash isn't of a known type.
   356  func RefFromHash(h hash.Hash) Ref {
   357  	meta, ok := metaFromType[reflect.TypeOf(h)]
   358  	if !ok {
   359  		panic(fmt.Sprintf("Currently-unsupported hash type %T", h))
   360  	}
   361  	return Ref{meta.ctor(h.Sum(nil))}
   362  }
   363  
   364  // RefFromString returns a blobref from the given string, for the currently
   365  // recommended hash function
   366  func RefFromString(s string) Ref {
   367  	return SHA1FromString(s)
   368  }
   369  
   370  // SHA1FromString returns a SHA-1 blobref of the provided string.
   371  func SHA1FromString(s string) Ref {
   372  	s1 := sha1.New()
   373  	s1.Write([]byte(s))
   374  	return RefFromHash(s1)
   375  }
   376  
   377  // SHA1FromBytes returns a SHA-1 blobref of the provided bytes.
   378  func SHA1FromBytes(b []byte) Ref {
   379  	s1 := sha1.New()
   380  	s1.Write(b)
   381  	return RefFromHash(s1)
   382  }
   383  
   384  type sha1Digest [20]byte
   385  
   386  func (s sha1Digest) digestName() string { return "sha1" }
   387  func (s sha1Digest) bytes() []byte      { return s[:] }
   388  func (s sha1Digest) newHash() hash.Hash { return sha1.New() }
   389  
   390  const maxOtherDigestLen = 128
   391  
   392  type otherDigest struct {
   393  	name   string
   394  	sum    [maxOtherDigestLen]byte
   395  	sumLen int  // bytes in sum that are valid
   396  	odd    bool // odd number of hex digits in input
   397  }
   398  
   399  func (d otherDigest) digestName() string { return d.name }
   400  func (d otherDigest) bytes() []byte      { return d.sum[:d.sumLen] }
   401  func (d otherDigest) newHash() hash.Hash { return nil }
   402  
   403  var sha1Meta = &digestMeta{
   404  	ctor:  sha1FromBinary,
   405  	ctors: sha1FromHexString,
   406  	ctorb: sha1FromHexBytes,
   407  	size:  sha1.Size,
   408  }
   409  
   410  var metaFromString = map[string]*digestMeta{
   411  	"sha1": sha1Meta,
   412  }
   413  
   414  type blobTypeAndMeta struct {
   415  	name []byte
   416  	meta *digestMeta
   417  }
   418  
   419  var metas []blobTypeAndMeta
   420  
   421  func metaFromBytes(name []byte) (meta *digestMeta, ok bool) {
   422  	for _, bm := range metas {
   423  		if bytes.Equal(name, bm.name) {
   424  			return bm.meta, true
   425  		}
   426  	}
   427  	return
   428  }
   429  
   430  func init() {
   431  	for name, meta := range metaFromString {
   432  		metas = append(metas, blobTypeAndMeta{
   433  			name: []byte(name),
   434  			meta: meta,
   435  		})
   436  	}
   437  }
   438  
   439  var sha1Type = reflect.TypeOf(sha1.New())
   440  
   441  var metaFromType = map[reflect.Type]*digestMeta{
   442  	sha1Type: sha1Meta,
   443  }
   444  
   445  type digestMeta struct {
   446  	ctor  func(binary []byte) digestType
   447  	ctors func(hex string) (digestType, bool)
   448  	ctorb func(hex []byte) (digestType, bool)
   449  	size  int // bytes of digest
   450  }
   451  
   452  var bufPool = make(chan []byte, 20)
   453  
   454  func getBuf(size int) []byte {
   455  	for {
   456  		select {
   457  		case b := <-bufPool:
   458  			if cap(b) >= size {
   459  				return b[:size]
   460  			}
   461  		default:
   462  			return make([]byte, size)
   463  		}
   464  	}
   465  }
   466  
   467  func putBuf(b []byte) {
   468  	select {
   469  	case bufPool <- b:
   470  	default:
   471  	}
   472  }
   473  
   474  // NewHash returns a new hash.Hash of the currently recommended hash type.
   475  // Currently this is just SHA-1, but will likely change within the next
   476  // year or so.
   477  func NewHash() hash.Hash {
   478  	return sha1.New()
   479  }
   480  
   481  func ValidRefString(s string) bool {
   482  	// TODO: optimize to not allocate
   483  	return ParseOrZero(s).Valid()
   484  }
   485  
   486  var null = []byte(`null`)
   487  
   488  func (r *Ref) UnmarshalJSON(d []byte) error {
   489  	if r.digest != nil {
   490  		return errors.New("Can't UnmarshalJSON into a non-zero Ref")
   491  	}
   492  	if len(d) == 0 || bytes.Equal(d, null) {
   493  		return nil
   494  	}
   495  	if len(d) < 2 || d[0] != '"' || d[len(d)-1] != '"' {
   496  		return fmt.Errorf("blob: expecting a JSON string to unmarshal, got %q", d)
   497  	}
   498  	d = d[1 : len(d)-1]
   499  	p, ok := ParseBytes(d)
   500  	if !ok {
   501  		return fmt.Errorf("blobref: invalid blobref %q (%d)", d, len(d))
   502  	}
   503  	*r = p
   504  	return nil
   505  }
   506  
   507  func (r Ref) MarshalJSON() ([]byte, error) {
   508  	if !r.Valid() {
   509  		return null, nil
   510  	}
   511  	dname := r.digest.digestName()
   512  	bs := r.digest.bytes()
   513  	buf := make([]byte, 0, 3+len(dname)+len(bs)*2)
   514  	buf = append(buf, '"')
   515  	buf = r.appendString(buf)
   516  	buf = append(buf, '"')
   517  	return buf, nil
   518  }
   519  
   520  // MarshalBinary implements Go's encoding.BinaryMarshaler interface.
   521  func (r Ref) MarshalBinary() (data []byte, err error) {
   522  	dname := r.digest.digestName()
   523  	bs := r.digest.bytes()
   524  	data = make([]byte, 0, len(dname)+1+len(bs))
   525  	data = append(data, dname...)
   526  	data = append(data, '-')
   527  	data = append(data, bs...)
   528  	return
   529  }
   530  
   531  // UnmarshalBinary implements Go's encoding.BinaryUnmarshaler interface.
   532  func (r *Ref) UnmarshalBinary(data []byte) error {
   533  	if r.digest != nil {
   534  		return errors.New("Can't UnmarshalBinary into a non-zero Ref")
   535  	}
   536  	i := bytes.IndexByte(data, '-')
   537  	if i < 1 {
   538  		return errors.New("no digest name")
   539  	}
   540  
   541  	digName := string(data[:i])
   542  	buf := data[i+1:]
   543  
   544  	meta, ok := metaFromString[digName]
   545  	if !ok {
   546  		r2, ok := parseUnknown(digName, fmt.Sprintf("%x", buf))
   547  		if !ok {
   548  			return errors.New("invalid blobref binary data")
   549  		}
   550  		*r = r2
   551  		return nil
   552  	}
   553  	if len(buf) != meta.size {
   554  		return errors.New("wrong size of data for digest " + digName)
   555  	}
   556  	r.digest = meta.ctor(buf)
   557  	return nil
   558  }
   559  
   560  // Less reports whether r sorts before o. Invalid references blobs sort first.
   561  func (r Ref) Less(o Ref) bool {
   562  	if r.Valid() != o.Valid() {
   563  		return o.Valid()
   564  	}
   565  	if !r.Valid() {
   566  		return false
   567  	}
   568  	if n1, n2 := r.digest.digestName(), o.digest.digestName(); n1 != n2 {
   569  		return n1 < n2
   570  	}
   571  	return bytes.Compare(r.digest.bytes(), o.digest.bytes()) < 0
   572  }
   573  
   574  // ByRef sorts blob references.
   575  type ByRef []Ref
   576  
   577  func (s ByRef) Len() int           { return len(s) }
   578  func (s ByRef) Less(i, j int) bool { return s[i].Less(s[j]) }
   579  func (s ByRef) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
   580  
   581  // SizedByRef sorts SizedRefs by their blobref.
   582  type SizedByRef []SizedRef
   583  
   584  func (s SizedByRef) Len() int           { return len(s) }
   585  func (s SizedByRef) Less(i, j int) bool { return s[i].Less(s[j].Ref) }
   586  func (s SizedByRef) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
   587  
   588  // TypeAlphabet returns the valid characters in the given blobref type.
   589  // It returns the empty string if the typ is unknown.
   590  func TypeAlphabet(typ string) string {
   591  	switch typ {
   592  	case "sha1":
   593  		return hexDigit
   594  	}
   595  	return ""
   596  }