github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/blobserver/encrypt/encrypt.go (about)

     1  /*
     2  Copyright 2013 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package encrypt registers the "encrypt" blobserver storage type
    18  // which stores all blobs and metadata with AES encryption into other
    19  // wrapped storage targets (e.g. localdisk, s3, remote, google).
    20  //
    21  // An encrypt storage target is configured with two other storage targets:
    22  // one to hold encrypted blobs, and one to hold encrypted metadata about
    23  // the encrypted blobs. On start-up, all the metadata blobs are read
    24  // to discover the plaintext blobrefs.
    25  //
    26  // Encryption is currently always AES-128.  See code for metadata formats
    27  // and configuration details, which are currently subject to change.
    28  //
    29  // WARNING: work in progress as of 2013-07-13.
    30  package encrypt
    31  
    32  import (
    33  	"bufio"
    34  	"bytes"
    35  	"container/heap"
    36  	"crypto/aes"
    37  	"crypto/cipher"
    38  	"crypto/rand"
    39  	"crypto/sha1"
    40  	"encoding/hex"
    41  	"errors"
    42  	"fmt"
    43  	"io"
    44  	"io/ioutil"
    45  	"log"
    46  	"os"
    47  	"strconv"
    48  	"strings"
    49  	"sync"
    50  	"time"
    51  
    52  	"camlistore.org/pkg/blob"
    53  	"camlistore.org/pkg/blobserver"
    54  	"camlistore.org/pkg/context"
    55  	"camlistore.org/pkg/jsonconfig"
    56  	"camlistore.org/pkg/sorted"
    57  	"camlistore.org/pkg/types"
    58  )
    59  
    60  // Compaction constants
    61  const (
    62  	// FullMetaBlobSize is the size at which we stop compacting
    63  	// a meta blob.
    64  	FullMetaBlobSize = 512 << 10
    65  )
    66  
    67  /*
    68  Dev notes:
    69  
    70  $ devcam put --path=/enc/ blob dev-camput
    71  sha1-282c0feceeb5cdf4c5086c191b15356fadfb2392
    72  $ devcam get --path=/enc/ sha1-282c0feceeb5cdf4c5086c191b15356fadfb2392
    73  $ find /tmp/camliroot-$USER/port3179/encblob/
    74  $ ./dev-camtool sync --src=http://localhost:3179/enc/ --dest=stdout
    75  
    76  */
    77  
    78  // TODO:
    79  // http://godoc.org/code.google.com/p/go.crypto/scrypt
    80  
    81  type storage struct {
    82  	// index is the meta index.
    83  	// it's keyed by plaintext blobref.
    84  	// the value is the meta key (encodeMetaValue)
    85  	index sorted.KeyValue
    86  
    87  	// Encryption key.
    88  	key   []byte
    89  	block cipher.Block // aes.NewCipher(key)
    90  
    91  	// blobs holds encrypted versions of all plaintext blobs.
    92  	blobs blobserver.Storage
    93  
    94  	// meta holds metadata mapping between the names of plaintext
    95  	// blobs and their after-encryption name, as well as their
    96  	// IV. Each blob in meta contains 1 or more blob
    97  	// description. All new insertions generate both a new
    98  	// encrypted blob in 'blobs' and one single-meta blob in
    99  	// 'meta'. The small metadata blobs are occasionally rolled up
   100  	// into bigger blobs with multiple blob descriptions.
   101  	meta blobserver.Storage
   102  
   103  	// TODO(bradfitz): finish metdata compaction
   104  	/*
   105  		// mu guards the following
   106  		mu sync.Mutex
   107  		// toDelete are the meta blobrefs that are no longer
   108  		// necessary, as they're subsets of others.
   109  		toDelete []blob.Ref
   110  		// plainIn maps from a plaintext blobref to its currently-largest-describing metablob.
   111  		plainIn map[string]*metaBlobInfo
   112  		// smallMeta tracks a heap of meta blobs, sorted by their encrypted size
   113  		smallMeta metaBlobHeap
   114  	*/
   115  
   116  	// Hooks for testing
   117  	testRandIV func() []byte
   118  }
   119  
   120  func (s *storage) setKey(key []byte) error {
   121  	var err error
   122  	s.block, err = aes.NewCipher(key)
   123  	if err != nil {
   124  		return fmt.Errorf("The key must be exactly 16 bytes (currently only AES-128 is supported): %v", err)
   125  	}
   126  	s.key = key
   127  	return nil
   128  }
   129  
   130  type metaBlobInfo struct {
   131  	br     blob.Ref // of meta blob
   132  	n      int      // size of meta blob
   133  	plains []blob.Ref
   134  }
   135  
   136  type metaBlobHeap []*metaBlobInfo
   137  
   138  var _ heap.Interface = (*metaBlobHeap)(nil)
   139  
   140  func (s *metaBlobHeap) Push(x interface{}) {
   141  	*s = append(*s, x.(*metaBlobInfo))
   142  }
   143  
   144  func (s *metaBlobHeap) Pop() interface{} {
   145  	l := s.Len()
   146  	v := (*s)[l]
   147  	*s = (*s)[:l-1]
   148  	return v
   149  }
   150  
   151  func (s *metaBlobHeap) Len() int { return len(*s) }
   152  func (s *metaBlobHeap) Less(i, j int) bool {
   153  	sl := *s
   154  	v := sl[i].n < sl[j].n
   155  	if !v && sl[i].n == sl[j].n {
   156  		v = sl[i].br.String() < sl[j].br.String()
   157  	}
   158  	return v
   159  }
   160  
   161  func (s *metaBlobHeap) Swap(i, j int) { (*s)[i], (*s)[j] = (*s)[j], (*s)[i] }
   162  
   163  func (s *storage) randIV() []byte {
   164  	if f := s.testRandIV; f != nil {
   165  		return f()
   166  	}
   167  	iv := make([]byte, s.block.BlockSize())
   168  	n, err := rand.Read(iv)
   169  	if err != nil {
   170  		panic(err)
   171  	}
   172  	if n != len(iv) {
   173  		panic("short read from crypto/rand")
   174  	}
   175  	return iv
   176  }
   177  
   178  /*
   179  Meta format:
   180     <16 bytes of IV> (for AES-128)
   181     <20 bytes of SHA-1 of plaintext>
   182     <encrypted>
   183  
   184  Where encrypted has plaintext of:
   185     #camlistore/encmeta=1
   186  Then sorted lines, each ending in a newline, like:
   187     sha1-plain/<metaValue>
   188  See the encodeMetaValue for the definition of metaValue, but in summary:
   189     sha1-plain/<plaintext size>/<iv as %x>/sha1-encrypted/<encrypted size>
   190  */
   191  
   192  func (s *storage) makeSingleMetaBlob(plainBR blob.Ref, meta string) []byte {
   193  	iv := s.randIV()
   194  
   195  	var plain bytes.Buffer
   196  	plain.WriteString("#camlistore/encmeta=1\n")
   197  	plain.WriteString(plainBR.String())
   198  	plain.WriteByte('/')
   199  	plain.WriteString(meta)
   200  	plain.WriteByte('\n')
   201  
   202  	s1 := sha1.New()
   203  	s1.Write(plain.Bytes())
   204  
   205  	var final bytes.Buffer
   206  	final.Grow(len(iv) + sha1.Size + plain.Len())
   207  	final.Write(iv)
   208  	final.Write(s1.Sum(final.Bytes()[len(iv):]))
   209  
   210  	_, err := io.Copy(cipher.StreamWriter{S: cipher.NewCTR(s.block, iv), W: &final}, &plain)
   211  	if err != nil {
   212  		panic(err)
   213  	}
   214  	return final.Bytes()
   215  }
   216  
   217  func (s *storage) RemoveBlobs(blobs []blob.Ref) error {
   218  	panic("TODO: implement")
   219  }
   220  
   221  func (s *storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error {
   222  	for _, br := range blobs {
   223  		v, err := s.index.Get(br.String())
   224  		if err == sorted.ErrNotFound {
   225  			continue
   226  		}
   227  		if err != nil {
   228  			return err
   229  		}
   230  		plainSize, ok := parseMetaValuePlainSize(v)
   231  		if !ok {
   232  			continue
   233  		}
   234  		if err != nil {
   235  			continue
   236  		}
   237  		dest <- blob.SizedRef{br, plainSize}
   238  	}
   239  	return nil
   240  }
   241  
   242  func (s *storage) ReceiveBlob(plainBR blob.Ref, source io.Reader) (sb blob.SizedRef, err error) {
   243  	iv := s.randIV()
   244  	stream := cipher.NewCTR(s.block, iv)
   245  
   246  	hash := plainBR.Hash()
   247  	var buf bytes.Buffer
   248  	// TODO: compress before encrypting?
   249  	buf.Write(iv) // TODO: write more structured header w/ version & IV length? or does that weaken it?
   250  	sw := cipher.StreamWriter{S: stream, W: &buf}
   251  	plainSize, err := io.Copy(io.MultiWriter(sw, hash), source)
   252  	if err != nil {
   253  		return sb, err
   254  	}
   255  	if !plainBR.HashMatches(hash) {
   256  		return sb, blobserver.ErrCorruptBlob
   257  	}
   258  
   259  	encBR := blob.SHA1FromBytes(buf.Bytes())
   260  	_, err = blobserver.Receive(s.blobs, encBR, bytes.NewReader(buf.Bytes()))
   261  	if err != nil {
   262  		log.Printf("encrypt: error writing encrypted blob %v (plaintext %v): %v", encBR, plainBR, err)
   263  		return sb, errors.New("encrypt: error writing encrypted blob")
   264  	}
   265  
   266  	meta := encodeMetaValue(uint32(plainSize), iv, encBR, buf.Len())
   267  	metaBlob := s.makeSingleMetaBlob(plainBR, meta)
   268  	_, err = blobserver.ReceiveNoHash(s.meta, blob.SHA1FromBytes(metaBlob), bytes.NewReader(metaBlob))
   269  	if err != nil {
   270  		log.Printf("encrypt: error writing encrypted meta for plaintext %v (encrypted blob %v): %v", plainBR, encBR, err)
   271  		return sb, errors.New("encrypt: error writing encrypted meta")
   272  	}
   273  
   274  	err = s.index.Set(plainBR.String(), meta)
   275  	if err != nil {
   276  		return sb, fmt.Errorf("encrypt: error updating index for encrypted %v (plaintext %v): %v", err)
   277  	}
   278  
   279  	return blob.SizedRef{plainBR, uint32(plainSize)}, nil
   280  }
   281  
   282  func (s *storage) Fetch(plainBR blob.Ref) (file io.ReadCloser, size uint32, err error) {
   283  	meta, err := s.fetchMeta(plainBR)
   284  	if err != nil {
   285  		return nil, 0, err
   286  	}
   287  	encData, _, err := s.blobs.Fetch(meta.EncBlobRef)
   288  	if err != nil {
   289  		log.Printf("encrypt: plaintext %s's encrypted %v blob not found", plainBR, meta.EncBlobRef)
   290  		return
   291  	}
   292  	defer encData.Close()
   293  
   294  	// Quick sanity check that the blob begins with the same IV we
   295  	// have in our metadata.
   296  	blobIV := make([]byte, len(meta.IV))
   297  	_, err = io.ReadFull(encData, blobIV)
   298  	if err != nil {
   299  		return nil, 0, fmt.Errorf("Error reading off IV header from blob: %v", err)
   300  	}
   301  	if !bytes.Equal(blobIV, meta.IV) {
   302  		return nil, 0, fmt.Errorf("Blob and meta IV don't match")
   303  	}
   304  
   305  	// Slurp the whole blob into memory to validate its plaintext
   306  	// checksum (no tampered bits) before returning it. Clients
   307  	// should be the party doing this in the general case, but
   308  	// we'll be extra paranoid and always do it here, at the cost
   309  	// of sometimes having it be done twice.
   310  	var plain bytes.Buffer
   311  	plainHash := plainBR.Hash()
   312  	plainSize, err := io.Copy(io.MultiWriter(&plain, plainHash), cipher.StreamReader{
   313  		S: cipher.NewCTR(s.block, meta.IV),
   314  		R: encData,
   315  	})
   316  	if err != nil {
   317  		return nil, 0, err
   318  	}
   319  	size = types.U32(plainSize)
   320  	if !plainBR.HashMatches(plainHash) {
   321  		return nil, 0, blobserver.ErrCorruptBlob
   322  	}
   323  	return struct {
   324  		*bytes.Reader
   325  		io.Closer
   326  	}{
   327  		bytes.NewReader(plain.Bytes()),
   328  		types.NopCloser,
   329  	}, uint32(plainSize), nil
   330  }
   331  
   332  func (s *storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error {
   333  	defer close(dest)
   334  	iter := s.index.Find(after, "")
   335  	n := 0
   336  	for iter.Next() {
   337  		if iter.Key() == after {
   338  			continue
   339  		}
   340  		br, ok := blob.Parse(iter.Key())
   341  		if !ok {
   342  			panic("Bogus encrypt index key: " + iter.Key())
   343  		}
   344  		plainSize, ok := parseMetaValuePlainSize(iter.Value())
   345  		if !ok {
   346  			panic("Bogus encrypt index value: " + iter.Value())
   347  		}
   348  		select {
   349  		case dest <- blob.SizedRef{br, plainSize}:
   350  		case <-ctx.Done():
   351  			return context.ErrCanceled
   352  		}
   353  		n++
   354  		if limit != 0 && n >= limit {
   355  			break
   356  		}
   357  	}
   358  	return iter.Close()
   359  }
   360  
   361  // processEncryptedMetaBlob decrypts dat (the data for the br meta blob) and parses
   362  // its meta lines, updating the index.
   363  //
   364  // processEncryptedMetaBlob is not thread-safe.
   365  func (s *storage) processEncryptedMetaBlob(br blob.Ref, dat []byte) error {
   366  	mi := &metaBlobInfo{
   367  		br: br,
   368  		n:  len(dat),
   369  	}
   370  	log.Printf("processing meta blob %v: %d bytes", br, len(dat))
   371  	ivSize := s.block.BlockSize()
   372  	if len(dat) < ivSize+sha1.Size {
   373  		return errors.New("data size is smaller than IV + SHA-1")
   374  	}
   375  	var (
   376  		iv       = dat[:ivSize]
   377  		wantHash = dat[ivSize : ivSize+sha1.Size]
   378  		enc      = dat[ivSize+sha1.Size:]
   379  	)
   380  	plain := bytes.NewBuffer(make([]byte, 0, len(dat)))
   381  	io.Copy(plain, cipher.StreamReader{
   382  		S: cipher.NewCTR(s.block, iv),
   383  		R: bytes.NewReader(enc),
   384  	})
   385  	s1 := sha1.New()
   386  	s1.Write(plain.Bytes())
   387  	if !bytes.Equal(wantHash, s1.Sum(nil)) {
   388  		return errors.New("hash of encrypted data doesn't match")
   389  	}
   390  	sc := bufio.NewScanner(plain)
   391  	if !sc.Scan() {
   392  		return errors.New("No first line")
   393  	}
   394  	if sc.Text() != "#camlistore/encmeta=1" {
   395  		line := sc.Text()
   396  		if len(line) > 80 {
   397  			line = line[:80]
   398  		}
   399  		return fmt.Errorf("unsupported first line %q", line)
   400  	}
   401  	for sc.Scan() {
   402  		line := sc.Text()
   403  		slash := strings.Index(line, "/")
   404  		if slash < 0 {
   405  			return errors.New("no slash in metaline")
   406  		}
   407  		plainBR, meta := line[:slash], line[slash+1:]
   408  		log.Printf("Adding meta: %q = %q", plainBR, meta)
   409  		mi.plains = append(mi.plains, blob.ParseOrZero(plainBR))
   410  		if err := s.index.Set(plainBR, meta); err != nil {
   411  			return err
   412  		}
   413  	}
   414  	return sc.Err()
   415  }
   416  
   417  func (s *storage) readAllMetaBlobs() error {
   418  	type metaBlob struct {
   419  		br  blob.Ref
   420  		dat []byte // encrypted blob
   421  		err error
   422  	}
   423  	metac := make(chan metaBlob, 16)
   424  
   425  	const maxInFlight = 50
   426  	var gate = make(chan bool, maxInFlight)
   427  
   428  	var stopEnumerate = make(chan bool) // closed on error
   429  	enumErrc := make(chan error, 1)
   430  	go func() {
   431  		var wg sync.WaitGroup
   432  		enumErrc <- blobserver.EnumerateAll(context.TODO(), s.meta, func(sb blob.SizedRef) error {
   433  			select {
   434  			case <-stopEnumerate:
   435  				return errors.New("enumeration stopped")
   436  			default:
   437  			}
   438  
   439  			wg.Add(1)
   440  			gate <- true
   441  			go func() {
   442  				defer wg.Done()
   443  				defer func() { <-gate }()
   444  				rc, _, err := s.meta.Fetch(sb.Ref)
   445  				var all []byte
   446  				if err == nil {
   447  					all, err = ioutil.ReadAll(rc)
   448  					rc.Close()
   449  				}
   450  				metac <- metaBlob{sb.Ref, all, err}
   451  			}()
   452  			return nil
   453  		})
   454  		wg.Wait()
   455  		close(metac)
   456  	}()
   457  
   458  	for mi := range metac {
   459  		err := mi.err
   460  		if err == nil {
   461  			err = s.processEncryptedMetaBlob(mi.br, mi.dat)
   462  		}
   463  		if err != nil {
   464  			close(stopEnumerate)
   465  			go func() {
   466  				for _ = range metac {
   467  				}
   468  			}()
   469  			// TODO: advertise in this error message a new option or environment variable
   470  			// to skip a certain or all meta blobs, to allow partial recovery, if some
   471  			// are corrupt. For now, require all to be correct.
   472  			return fmt.Errorf("Error with meta blob %v: %v", mi.br, err)
   473  		}
   474  	}
   475  
   476  	return <-enumErrc
   477  }
   478  
   479  func encodeMetaValue(plainSize uint32, iv []byte, encBR blob.Ref, encSize int) string {
   480  	return fmt.Sprintf("%d/%x/%s/%d", plainSize, iv, encBR, encSize)
   481  }
   482  
   483  type metaValue struct {
   484  	IV         []byte
   485  	EncBlobRef blob.Ref
   486  	EncSize    uint32
   487  	PlainSize  uint32
   488  }
   489  
   490  // returns os.ErrNotExist on cache miss
   491  func (s *storage) fetchMeta(b blob.Ref) (*metaValue, error) {
   492  	v, err := s.index.Get(b.String())
   493  	if err == sorted.ErrNotFound {
   494  		err = os.ErrNotExist
   495  	}
   496  	if err != nil {
   497  		return nil, err
   498  	}
   499  	return parseMetaValue(v)
   500  }
   501  
   502  func parseMetaValuePlainSize(v string) (plainSize uint32, ok bool) {
   503  	slash := strings.Index(v, "/")
   504  	if slash < 0 {
   505  		return
   506  	}
   507  	n, err := strconv.ParseUint(v[:slash], 10, 32)
   508  	if err != nil {
   509  		return
   510  	}
   511  	return uint32(n), true
   512  }
   513  
   514  func parseMetaValue(v string) (mv *metaValue, err error) {
   515  	f := strings.Split(v, "/")
   516  	if len(f) != 4 {
   517  		return nil, errors.New("wrong number of fields")
   518  	}
   519  	mv = &metaValue{}
   520  	plainSize, err := strconv.ParseUint(f[0], 10, 32)
   521  	if err != nil {
   522  		return nil, fmt.Errorf("bad plaintext size in meta %q", v)
   523  	}
   524  	mv.PlainSize = uint32(plainSize)
   525  	mv.IV, err = hex.DecodeString(f[1])
   526  	if err != nil {
   527  		return nil, fmt.Errorf("bad iv in meta %q", v)
   528  	}
   529  	var ok bool
   530  	mv.EncBlobRef, ok = blob.Parse(f[2])
   531  	if !ok {
   532  		return nil, fmt.Errorf("bad blobref in meta %q", v)
   533  	}
   534  	encSize, err := strconv.ParseUint(f[3], 10, 32)
   535  	if err != nil {
   536  		return nil, fmt.Errorf("bad encrypted size in meta %q", v)
   537  	}
   538  	mv.EncSize = uint32(encSize)
   539  	return mv, nil
   540  }
   541  
   542  func init() {
   543  	blobserver.RegisterStorageConstructor("encrypt", blobserver.StorageConstructor(newFromConfig))
   544  }
   545  
   546  func newFromConfig(ld blobserver.Loader, config jsonconfig.Obj) (bs blobserver.Storage, err error) {
   547  	metaConf := config.RequiredObject("metaIndex")
   548  	sto := &storage{}
   549  	agreement := config.OptionalString("I_AGREE", "")
   550  	const wantAgreement = "that encryption support hasn't been peer-reviewed, isn't finished, and its format might change."
   551  	if agreement != wantAgreement {
   552  		return nil, errors.New("Use of the 'encrypt' target without the proper I_AGREE value.")
   553  	}
   554  
   555  	key := config.OptionalString("key", "")
   556  	keyFile := config.OptionalString("keyFile", "")
   557  	var keyb []byte
   558  	switch {
   559  	case key != "":
   560  		keyb, err = hex.DecodeString(key)
   561  		if err != nil || len(keyb) != 16 {
   562  			return nil, fmt.Errorf("The 'key' parameter must be 16 bytes of 32 hex digits. (currently fixed at AES-128)")
   563  		}
   564  	case keyFile != "":
   565  		// TODO: check that keyFile's unix permissions aren't too permissive.
   566  		keyb, err = ioutil.ReadFile(keyFile)
   567  		if err != nil {
   568  			return nil, fmt.Errorf("Reading key file %v: %v", keyFile, err)
   569  		}
   570  	}
   571  	blobStorage := config.RequiredString("blobs")
   572  	metaStorage := config.RequiredString("meta")
   573  	if err := config.Validate(); err != nil {
   574  		return nil, err
   575  	}
   576  
   577  	sto.index, err = sorted.NewKeyValue(metaConf)
   578  	if err != nil {
   579  		return
   580  	}
   581  
   582  	sto.blobs, err = ld.GetStorage(blobStorage)
   583  	if err != nil {
   584  		return
   585  	}
   586  	sto.meta, err = ld.GetStorage(metaStorage)
   587  	if err != nil {
   588  		return
   589  	}
   590  
   591  	if keyb == nil {
   592  		// TODO: add a way to prompt from stdin on start? or keychain support?
   593  		return nil, errors.New("no encryption key set with 'key' or 'keyFile'")
   594  	}
   595  
   596  	if err := sto.setKey(keyb); err != nil {
   597  		return nil, err
   598  	}
   599  
   600  	start := time.Now()
   601  	log.Printf("Reading encryption metadata...")
   602  	if err := sto.readAllMetaBlobs(); err != nil {
   603  		return nil, fmt.Errorf("Error scanning metadata on start-up: %v", err)
   604  	}
   605  	log.Printf("Read all encryption metadata in %.3f seconds", time.Since(start).Seconds())
   606  
   607  	return sto, nil
   608  }