github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/pkg/blobserver/encrypt/encrypt.go (about)

     1  /*
     2  Copyright 2013 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package encrypt registers the "encrypt" blobserver storage type
    18  // which stores all blobs and metadata with AES encryption into other
    19  // wrapped storage targets (e.g. localdisk, s3, remote, google).
    20  //
    21  // An encrypt storage target is configured with two other storage targets:
    22  // one to hold encrypted blobs, and one to hold encrypted metadata about
    23  // the encrypted blobs. On start-up, all the metadata blobs are read
    24  // to discover the plaintext blobrefs.
    25  //
    26  // Encryption is currently always AES-128.  See code for metadata formats
    27  // and configuration details, which are currently subject to change.
    28  //
    29  // WARNING: work in progress as of 2013-07-13.
    30  package encrypt
    31  
    32  import (
    33  	"bufio"
    34  	"bytes"
    35  	"container/heap"
    36  	"crypto/aes"
    37  	"crypto/cipher"
    38  	"crypto/rand"
    39  	"crypto/sha1"
    40  	"encoding/hex"
    41  	"errors"
    42  	"fmt"
    43  	"io"
    44  	"io/ioutil"
    45  	"log"
    46  	"os"
    47  	"strconv"
    48  	"strings"
    49  	"sync"
    50  
    51  	"camlistore.org/pkg/blob"
    52  	"camlistore.org/pkg/blobserver"
    53  	"camlistore.org/pkg/context"
    54  	"camlistore.org/pkg/jsonconfig"
    55  	"camlistore.org/pkg/sorted"
    56  )
    57  
    58  // Compaction constants
    59  const (
    60  	// FullMetaBlobSize is the size at which we stop compacting
    61  	// a meta blob.
    62  	FullMetaBlobSize = 512 << 10
    63  )
    64  
    65  /*
    66  Dev notes:
    67  
    68  $ devcam put --path=/enc/ blob dev-camput
    69  sha1-282c0feceeb5cdf4c5086c191b15356fadfb2392
    70  $ devcam get --path=/enc/ sha1-282c0feceeb5cdf4c5086c191b15356fadfb2392
    71  $ find /tmp/camliroot-$USER/port3179/encblob/
    72  $ ./dev-camtool sync --src=http://localhost:3179/enc/ --dest=stdout
    73  
    74  */
    75  
    76  // TODO:
    77  // http://godoc.org/code.google.com/p/go.crypto/scrypt
    78  
    79  type storage struct {
    80  	// index is the meta index.
    81  	// it's keyed by plaintext blobref.
    82  	// the value is the meta key (encodeMetaValue)
    83  	index sorted.KeyValue
    84  
    85  	// Encryption key.
    86  	key   []byte
    87  	block cipher.Block // aes.NewCipher(key)
    88  
    89  	// blobs holds encrypted versions of all plaintext blobs.
    90  	blobs blobserver.Storage
    91  
    92  	// meta holds metadata mapping between the names of plaintext
    93  	// blobs and their after-encryption name, as well as their
    94  	// IV. Each blob in meta contains 1 or more blob
    95  	// description. All new insertions generate both a new
    96  	// encrypted blob in 'blobs' and one single-meta blob in
    97  	// 'meta'. The small metadata blobs are occasionally rolled up
    98  	// into bigger blobs with multiple blob descriptions.
    99  	meta blobserver.Storage
   100  
   101  	// TODO(bradfitz): finish metdata compaction
   102  	/*
   103  		// mu guards the following
   104  		mu sync.Mutex
   105  		// toDelete are the meta blobrefs that are no longer
   106  		// necessary, as they're subsets of others.
   107  		toDelete []blob.Ref
   108  		// plainIn maps from a plaintext blobref to its currently-largest-describing metablob.
   109  		plainIn map[string]*metaBlobInfo
   110  		// smallMeta tracks a heap of meta blobs, sorted by their encrypted size
   111  		smallMeta metaBlobHeap
   112  	*/
   113  
   114  	// Hooks for testing
   115  	testRandIV func() []byte
   116  }
   117  
   118  func (s *storage) setKey(key []byte) error {
   119  	var err error
   120  	s.block, err = aes.NewCipher(key)
   121  	if err != nil {
   122  		return fmt.Errorf("The key must be exactly 16 bytes (currently only AES-128 is supported): %v", err)
   123  	}
   124  	s.key = key
   125  	return nil
   126  }
   127  
   128  type metaBlobInfo struct {
   129  	br     blob.Ref // of meta blob
   130  	n      int      // size of meta blob
   131  	plains []blob.Ref
   132  }
   133  
   134  type metaBlobHeap []*metaBlobInfo
   135  
   136  var _ heap.Interface = (*metaBlobHeap)(nil)
   137  
   138  func (s *metaBlobHeap) Push(x interface{}) {
   139  	*s = append(*s, x.(*metaBlobInfo))
   140  }
   141  
   142  func (s *metaBlobHeap) Pop() interface{} {
   143  	l := s.Len()
   144  	v := (*s)[l]
   145  	*s = (*s)[:l-1]
   146  	return v
   147  }
   148  
   149  func (s *metaBlobHeap) Len() int { return len(*s) }
   150  func (s *metaBlobHeap) Less(i, j int) bool {
   151  	sl := *s
   152  	v := sl[i].n < sl[j].n
   153  	if !v && sl[i].n == sl[j].n {
   154  		v = sl[i].br.String() < sl[j].br.String()
   155  	}
   156  	return v
   157  }
   158  
   159  func (s *metaBlobHeap) Swap(i, j int) { (*s)[i], (*s)[j] = (*s)[j], (*s)[i] }
   160  
   161  func (s *storage) randIV() []byte {
   162  	if f := s.testRandIV; f != nil {
   163  		return f()
   164  	}
   165  	iv := make([]byte, s.block.BlockSize())
   166  	n, err := rand.Read(iv)
   167  	if err != nil {
   168  		panic(err)
   169  	}
   170  	if n != len(iv) {
   171  		panic("short read from crypto/rand")
   172  	}
   173  	return iv
   174  }
   175  
   176  /*
   177  Meta format:
   178     <16 bytes of IV> (for AES-128)
   179     <20 bytes of SHA-1 of plaintext>
   180     <encrypted>
   181  
   182  Where encrypted has plaintext of:
   183     #camlistore/encmeta=1
   184  Then sorted lines, each ending in a newline, like:
   185     sha1-plain/<metaValue>
   186  See the encodeMetaValue for the definition of metaValue, but in summary:
   187     sha1-plain/<plaintext size>/<iv as %x>/sha1-encrypted/<encrypted size>
   188  */
   189  
   190  func (s *storage) makeSingleMetaBlob(plainBR blob.Ref, meta string) []byte {
   191  	iv := s.randIV()
   192  
   193  	var plain bytes.Buffer
   194  	plain.WriteString("#camlistore/encmeta=1\n")
   195  	plain.WriteString(plainBR.String())
   196  	plain.WriteByte('/')
   197  	plain.WriteString(meta)
   198  	plain.WriteByte('\n')
   199  
   200  	s1 := sha1.New()
   201  	s1.Write(plain.Bytes())
   202  
   203  	var final bytes.Buffer
   204  	final.Grow(len(iv) + sha1.Size + plain.Len())
   205  	final.Write(iv)
   206  	final.Write(s1.Sum(final.Bytes()[len(iv):]))
   207  
   208  	_, err := io.Copy(cipher.StreamWriter{S: cipher.NewCTR(s.block, iv), W: &final}, &plain)
   209  	if err != nil {
   210  		panic(err)
   211  	}
   212  	return final.Bytes()
   213  }
   214  
   215  func (s *storage) RemoveBlobs(blobs []blob.Ref) error {
   216  	panic("TODO: implement")
   217  }
   218  
   219  func (s *storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error {
   220  	for _, br := range blobs {
   221  		v, err := s.index.Get(br.String())
   222  		if err == sorted.ErrNotFound {
   223  			continue
   224  		}
   225  		if err != nil {
   226  			return err
   227  		}
   228  		plainSize, ok := parseMetaValuePlainSize(v)
   229  		if !ok {
   230  			continue
   231  		}
   232  		if err != nil {
   233  			continue
   234  		}
   235  		dest <- blob.SizedRef{br, plainSize}
   236  	}
   237  	return nil
   238  }
   239  
   240  func (s *storage) ReceiveBlob(plainBR blob.Ref, source io.Reader) (sb blob.SizedRef, err error) {
   241  	iv := s.randIV()
   242  	stream := cipher.NewCTR(s.block, iv)
   243  
   244  	hash := plainBR.Hash()
   245  	var buf bytes.Buffer
   246  	// TODO: compress before encrypting?
   247  	buf.Write(iv) // TODO: write more structured header w/ version & IV length? or does that weaken it?
   248  	sw := cipher.StreamWriter{S: stream, W: &buf}
   249  	plainSize, err := io.Copy(io.MultiWriter(sw, hash), source)
   250  	if err != nil {
   251  		return sb, err
   252  	}
   253  	if !plainBR.HashMatches(hash) {
   254  		return sb, blobserver.ErrCorruptBlob
   255  	}
   256  
   257  	encBR := blob.SHA1FromBytes(buf.Bytes())
   258  	_, err = blobserver.Receive(s.blobs, encBR, bytes.NewReader(buf.Bytes()))
   259  	if err != nil {
   260  		log.Printf("encrypt: error writing encrypted blob %v (plaintext %v): %v", encBR, plainBR, err)
   261  		return sb, errors.New("encrypt: error writing encrypted blob")
   262  	}
   263  
   264  	meta := encodeMetaValue(plainSize, iv, encBR, buf.Len())
   265  	metaBlob := s.makeSingleMetaBlob(plainBR, meta)
   266  	_, err = blobserver.ReceiveNoHash(s.meta, blob.SHA1FromBytes(metaBlob), bytes.NewReader(metaBlob))
   267  	if err != nil {
   268  		log.Printf("encrypt: error writing encrypted meta for plaintext %v (encrypted blob %v): %v", plainBR, encBR, err)
   269  		return sb, errors.New("encrypt: error writing encrypted meta")
   270  	}
   271  
   272  	err = s.index.Set(plainBR.String(), meta)
   273  	if err != nil {
   274  		return sb, fmt.Errorf("encrypt: error updating index for encrypted %v (plaintext %v): %v", err)
   275  	}
   276  
   277  	return blob.SizedRef{plainBR, plainSize}, nil
   278  }
   279  
   280  func (s *storage) FetchStreaming(plainBR blob.Ref) (file io.ReadCloser, size int64, err error) {
   281  	meta, err := s.fetchMeta(plainBR)
   282  	if err != nil {
   283  		return nil, 0, err
   284  	}
   285  	encData, _, err := s.blobs.FetchStreaming(meta.EncBlobRef)
   286  	if err != nil {
   287  		log.Printf("encrypt: plaintext %s's encrypted %v blob not found", plainBR, meta.EncBlobRef)
   288  		return
   289  	}
   290  	defer encData.Close()
   291  
   292  	// Quick sanity check that the blob begins with the same IV we
   293  	// have in our metadata.
   294  	blobIV := make([]byte, len(meta.IV))
   295  	_, err = io.ReadFull(encData, blobIV)
   296  	if err != nil {
   297  		return nil, 0, fmt.Errorf("Error reading off IV header from blob: %v", err)
   298  	}
   299  	if !bytes.Equal(blobIV, meta.IV) {
   300  		return nil, 0, fmt.Errorf("Blob and meta IV don't match")
   301  	}
   302  
   303  	// Slurp the whole blob into memory to validate its plaintext
   304  	// checksum (no tampered bits) before returning it. Clients
   305  	// should be the party doing this in the general case, but
   306  	// we'll be extra paranoid and always do it here, at the cost
   307  	// of sometimes having it be done twice.
   308  	var plain bytes.Buffer
   309  	plainHash := plainBR.Hash()
   310  	plainSize, err := io.Copy(io.MultiWriter(&plain, plainHash), cipher.StreamReader{
   311  		S: cipher.NewCTR(s.block, meta.IV),
   312  		R: encData,
   313  	})
   314  	if err != nil {
   315  		return nil, 0, err
   316  	}
   317  	if !plainBR.HashMatches(plainHash) {
   318  		return nil, 0, blobserver.ErrCorruptBlob
   319  	}
   320  	return struct {
   321  		*bytes.Reader
   322  		io.Closer
   323  	}{
   324  		bytes.NewReader(plain.Bytes()),
   325  		dummyCloser,
   326  	}, plainSize, nil
   327  }
   328  
   329  func (s *storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error {
   330  	defer close(dest)
   331  	iter := s.index.Find(after, "")
   332  	n := 0
   333  	for iter.Next() {
   334  		if iter.Key() == after {
   335  			continue
   336  		}
   337  		br, ok := blob.Parse(iter.Key())
   338  		if !ok {
   339  			panic("Bogus encrypt index key: " + iter.Key())
   340  		}
   341  		plainSize, ok := parseMetaValuePlainSize(iter.Value())
   342  		if !ok {
   343  			panic("Bogus encrypt index value: " + iter.Value())
   344  		}
   345  		select {
   346  		case dest <- blob.SizedRef{br, plainSize}:
   347  		case <-ctx.Done():
   348  			return context.ErrCanceled
   349  		}
   350  		n++
   351  		if limit != 0 && n >= limit {
   352  			break
   353  		}
   354  	}
   355  	return iter.Close()
   356  }
   357  
   358  // processEncryptedMetaBlob decrypts dat (the data for the br meta blob) and parses
   359  // its meta lines, updating the index.
   360  //
   361  // processEncryptedMetaBlob is not thread-safe.
   362  func (s *storage) processEncryptedMetaBlob(br blob.Ref, dat []byte) error {
   363  	mi := &metaBlobInfo{
   364  		br: br,
   365  		n:  len(dat),
   366  	}
   367  	log.Printf("processing meta blob %v: %d bytes", br, len(dat))
   368  	ivSize := s.block.BlockSize()
   369  	if len(dat) < ivSize+sha1.Size {
   370  		return errors.New("data size is smaller than IV + SHA-1")
   371  	}
   372  	var (
   373  		iv       = dat[:ivSize]
   374  		wantHash = dat[ivSize : ivSize+sha1.Size]
   375  		enc      = dat[ivSize+sha1.Size:]
   376  	)
   377  	plain := bytes.NewBuffer(make([]byte, 0, len(dat)))
   378  	io.Copy(plain, cipher.StreamReader{
   379  		S: cipher.NewCTR(s.block, iv),
   380  		R: bytes.NewReader(enc),
   381  	})
   382  	s1 := sha1.New()
   383  	s1.Write(plain.Bytes())
   384  	if !bytes.Equal(wantHash, s1.Sum(nil)) {
   385  		return errors.New("hash of encrypted data doesn't match")
   386  	}
   387  	sc := bufio.NewScanner(plain)
   388  	if !sc.Scan() {
   389  		return errors.New("No first line")
   390  	}
   391  	if sc.Text() != "#camlistore/encmeta=1" {
   392  		line := sc.Text()
   393  		if len(line) > 80 {
   394  			line = line[:80]
   395  		}
   396  		return fmt.Errorf("unsupported first line %q", line)
   397  	}
   398  	for sc.Scan() {
   399  		line := sc.Text()
   400  		slash := strings.Index(line, "/")
   401  		if slash < 0 {
   402  			return errors.New("no slash in metaline")
   403  		}
   404  		plainBR, meta := line[:slash], line[slash+1:]
   405  		log.Printf("Adding meta: %q = %q", plainBR, meta)
   406  		mi.plains = append(mi.plains, blob.ParseOrZero(plainBR))
   407  		if err := s.index.Set(plainBR, meta); err != nil {
   408  			return err
   409  		}
   410  	}
   411  	return sc.Err()
   412  }
   413  
   414  func (s *storage) readAllMetaBlobs() error {
   415  	type metaBlob struct {
   416  		br  blob.Ref
   417  		dat []byte // encrypted blob
   418  		err error
   419  	}
   420  	metac := make(chan metaBlob, 16)
   421  
   422  	const maxInFlight = 50
   423  	var gate = make(chan bool, maxInFlight)
   424  
   425  	var stopEnumerate = make(chan bool) // closed on error
   426  	enumErrc := make(chan error, 1)
   427  	go func() {
   428  		var wg sync.WaitGroup
   429  		enumErrc <- blobserver.EnumerateAll(context.TODO(), s.meta, func(sb blob.SizedRef) error {
   430  			select {
   431  			case <-stopEnumerate:
   432  				return errors.New("enumeration stopped")
   433  			default:
   434  			}
   435  
   436  			wg.Add(1)
   437  			gate <- true
   438  			go func() {
   439  				defer wg.Done()
   440  				defer func() { <-gate }()
   441  				rc, _, err := s.meta.FetchStreaming(sb.Ref)
   442  				var all []byte
   443  				if err == nil {
   444  					all, err = ioutil.ReadAll(rc)
   445  					rc.Close()
   446  				}
   447  				metac <- metaBlob{sb.Ref, all, err}
   448  			}()
   449  			return nil
   450  		})
   451  		wg.Wait()
   452  		close(metac)
   453  	}()
   454  
   455  	for mi := range metac {
   456  		err := mi.err
   457  		if err == nil {
   458  			err = s.processEncryptedMetaBlob(mi.br, mi.dat)
   459  		}
   460  		if err != nil {
   461  			close(stopEnumerate)
   462  			go func() {
   463  				for _ = range metac {
   464  				}
   465  			}()
   466  			// TODO: advertise in this error message a new option or environment variable
   467  			// to skip a certain or all meta blobs, to allow partial recovery, if some
   468  			// are corrupt. For now, require all to be correct.
   469  			return fmt.Errorf("Error with meta blob %v: %v", mi.br, err)
   470  		}
   471  	}
   472  
   473  	return <-enumErrc
   474  }
   475  
   476  func encodeMetaValue(plainSize int64, iv []byte, encBR blob.Ref, encSize int) string {
   477  	return fmt.Sprintf("%d/%x/%s/%d", plainSize, iv, encBR, encSize)
   478  }
   479  
   480  type metaValue struct {
   481  	IV         []byte
   482  	EncBlobRef blob.Ref
   483  	EncSize    int64
   484  	PlainSize  int64
   485  }
   486  
   487  // returns os.ErrNotExist on cache miss
   488  func (s *storage) fetchMeta(b blob.Ref) (*metaValue, error) {
   489  	v, err := s.index.Get(b.String())
   490  	if err == sorted.ErrNotFound {
   491  		err = os.ErrNotExist
   492  	}
   493  	if err != nil {
   494  		return nil, err
   495  	}
   496  	return parseMetaValue(v)
   497  }
   498  
   499  func parseMetaValuePlainSize(v string) (plainSize int64, ok bool) {
   500  	slash := strings.Index(v, "/")
   501  	if slash < 0 {
   502  		return
   503  	}
   504  	n, err := strconv.Atoi(v[:slash])
   505  	if err != nil {
   506  		return
   507  	}
   508  	return int64(n), true
   509  }
   510  
   511  func parseMetaValue(v string) (mv *metaValue, err error) {
   512  	f := strings.Split(v, "/")
   513  	if len(f) != 4 {
   514  		return nil, errors.New("wrong number of fields")
   515  	}
   516  	mv = &metaValue{}
   517  	mv.PlainSize, err = strconv.ParseInt(f[0], 10, 64)
   518  	if err != nil {
   519  		return nil, fmt.Errorf("bad plaintext size in meta %q", v)
   520  	}
   521  	mv.IV, err = hex.DecodeString(f[1])
   522  	if err != nil {
   523  		return nil, fmt.Errorf("bad iv in meta %q", v)
   524  	}
   525  	var ok bool
   526  	mv.EncBlobRef, ok = blob.Parse(f[2])
   527  	if !ok {
   528  		return nil, fmt.Errorf("bad blobref in meta %q", v)
   529  	}
   530  	mv.EncSize, err = strconv.ParseInt(f[3], 10, 64)
   531  	if err != nil {
   532  		return nil, fmt.Errorf("bad encrypted size in meta %q", v)
   533  	}
   534  	return mv, nil
   535  }
   536  
   537  var dummyCloser io.Closer = ioutil.NopCloser(nil)
   538  
   539  func init() {
   540  	blobserver.RegisterStorageConstructor("encrypt", blobserver.StorageConstructor(newFromConfig))
   541  }
   542  
   543  func newFromConfig(ld blobserver.Loader, config jsonconfig.Obj) (bs blobserver.Storage, err error) {
   544  	metaConf := config.RequiredObject("metaIndex")
   545  	sto := &storage{}
   546  	agreement := config.OptionalString("I_AGREE", "")
   547  	const wantAgreement = "that encryption support hasn't been peer-reviewed, isn't finished, and its format might change."
   548  	if agreement != wantAgreement {
   549  		return nil, errors.New("Use of the 'encrypt' target without the proper I_AGREE value.")
   550  	}
   551  
   552  	key := config.OptionalString("key", "")
   553  	keyFile := config.OptionalString("keyFile", "")
   554  	var keyb []byte
   555  	switch {
   556  	case key != "":
   557  		keyb, err = hex.DecodeString(key)
   558  		if err != nil || len(keyb) != 16 {
   559  			return nil, fmt.Errorf("The 'key' parameter must be 16 bytes of 32 hex digits. (currently fixed at AES-128)")
   560  		}
   561  	case keyFile != "":
   562  		// TODO: check that keyFile's unix permissions aren't too permissive.
   563  		keyb, err = ioutil.ReadFile(keyFile)
   564  		if err != nil {
   565  			return nil, fmt.Errorf("Reading key file %v: %v", keyFile, err)
   566  		}
   567  	}
   568  	blobStorage := config.RequiredString("blobs")
   569  	metaStorage := config.RequiredString("meta")
   570  	if err := config.Validate(); err != nil {
   571  		return nil, err
   572  	}
   573  
   574  	sto.index, err = sorted.NewKeyValue(metaConf)
   575  	if err != nil {
   576  		return
   577  	}
   578  
   579  	sto.blobs, err = ld.GetStorage(blobStorage)
   580  	if err != nil {
   581  		return
   582  	}
   583  	sto.meta, err = ld.GetStorage(metaStorage)
   584  	if err != nil {
   585  		return
   586  	}
   587  
   588  	if keyb == nil {
   589  		// TODO: add a way to prompt from stdin on start? or keychain support?
   590  		return nil, errors.New("no encryption key set with 'key' or 'keyFile'")
   591  	}
   592  
   593  	if err := sto.setKey(keyb); err != nil {
   594  		return nil, err
   595  	}
   596  
   597  	log.Printf("Reading encryption metadata...")
   598  	if err := sto.readAllMetaBlobs(); err != nil {
   599  		return nil, fmt.Errorf("Error scanning metadata on start-up: %v", err)
   600  	}
   601  	log.Printf("Read all encryption metadata.")
   602  
   603  	return sto, nil
   604  }