github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/blobserver/encrypt/encrypt.go (about) 1 /* 2 Copyright 2013 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package encrypt registers the "encrypt" blobserver storage type 18 // which stores all blobs and metadata with AES encryption into other 19 // wrapped storage targets (e.g. localdisk, s3, remote, google). 20 // 21 // An encrypt storage target is configured with two other storage targets: 22 // one to hold encrypted blobs, and one to hold encrypted metadata about 23 // the encrypted blobs. On start-up, all the metadata blobs are read 24 // to discover the plaintext blobrefs. 25 // 26 // Encryption is currently always AES-128. See code for metadata formats 27 // and configuration details, which are currently subject to change. 28 // 29 // WARNING: work in progress as of 2013-07-13. 30 package encrypt 31 32 import ( 33 "bufio" 34 "bytes" 35 "container/heap" 36 "crypto/aes" 37 "crypto/cipher" 38 "crypto/rand" 39 "crypto/sha1" 40 "encoding/hex" 41 "errors" 42 "fmt" 43 "io" 44 "io/ioutil" 45 "log" 46 "os" 47 "strconv" 48 "strings" 49 "sync" 50 "time" 51 52 "camlistore.org/pkg/blob" 53 "camlistore.org/pkg/blobserver" 54 "camlistore.org/pkg/context" 55 "camlistore.org/pkg/jsonconfig" 56 "camlistore.org/pkg/sorted" 57 "camlistore.org/pkg/types" 58 ) 59 60 // Compaction constants 61 const ( 62 // FullMetaBlobSize is the size at which we stop compacting 63 // a meta blob. 64 FullMetaBlobSize = 512 << 10 65 ) 66 67 /* 68 Dev notes: 69 70 $ devcam put --path=/enc/ blob dev-camput 71 sha1-282c0feceeb5cdf4c5086c191b15356fadfb2392 72 $ devcam get --path=/enc/ sha1-282c0feceeb5cdf4c5086c191b15356fadfb2392 73 $ find /tmp/camliroot-$USER/port3179/encblob/ 74 $ ./dev-camtool sync --src=http://localhost:3179/enc/ --dest=stdout 75 76 */ 77 78 // TODO: 79 // http://godoc.org/code.google.com/p/go.crypto/scrypt 80 81 type storage struct { 82 // index is the meta index. 83 // it's keyed by plaintext blobref. 84 // the value is the meta key (encodeMetaValue) 85 index sorted.KeyValue 86 87 // Encryption key. 88 key []byte 89 block cipher.Block // aes.NewCipher(key) 90 91 // blobs holds encrypted versions of all plaintext blobs. 92 blobs blobserver.Storage 93 94 // meta holds metadata mapping between the names of plaintext 95 // blobs and their after-encryption name, as well as their 96 // IV. Each blob in meta contains 1 or more blob 97 // description. All new insertions generate both a new 98 // encrypted blob in 'blobs' and one single-meta blob in 99 // 'meta'. The small metadata blobs are occasionally rolled up 100 // into bigger blobs with multiple blob descriptions. 101 meta blobserver.Storage 102 103 // TODO(bradfitz): finish metdata compaction 104 /* 105 // mu guards the following 106 mu sync.Mutex 107 // toDelete are the meta blobrefs that are no longer 108 // necessary, as they're subsets of others. 109 toDelete []blob.Ref 110 // plainIn maps from a plaintext blobref to its currently-largest-describing metablob. 111 plainIn map[string]*metaBlobInfo 112 // smallMeta tracks a heap of meta blobs, sorted by their encrypted size 113 smallMeta metaBlobHeap 114 */ 115 116 // Hooks for testing 117 testRandIV func() []byte 118 } 119 120 func (s *storage) setKey(key []byte) error { 121 var err error 122 s.block, err = aes.NewCipher(key) 123 if err != nil { 124 return fmt.Errorf("The key must be exactly 16 bytes (currently only AES-128 is supported): %v", err) 125 } 126 s.key = key 127 return nil 128 } 129 130 type metaBlobInfo struct { 131 br blob.Ref // of meta blob 132 n int // size of meta blob 133 plains []blob.Ref 134 } 135 136 type metaBlobHeap []*metaBlobInfo 137 138 var _ heap.Interface = (*metaBlobHeap)(nil) 139 140 func (s *metaBlobHeap) Push(x interface{}) { 141 *s = append(*s, x.(*metaBlobInfo)) 142 } 143 144 func (s *metaBlobHeap) Pop() interface{} { 145 l := s.Len() 146 v := (*s)[l] 147 *s = (*s)[:l-1] 148 return v 149 } 150 151 func (s *metaBlobHeap) Len() int { return len(*s) } 152 func (s *metaBlobHeap) Less(i, j int) bool { 153 sl := *s 154 v := sl[i].n < sl[j].n 155 if !v && sl[i].n == sl[j].n { 156 v = sl[i].br.String() < sl[j].br.String() 157 } 158 return v 159 } 160 161 func (s *metaBlobHeap) Swap(i, j int) { (*s)[i], (*s)[j] = (*s)[j], (*s)[i] } 162 163 func (s *storage) randIV() []byte { 164 if f := s.testRandIV; f != nil { 165 return f() 166 } 167 iv := make([]byte, s.block.BlockSize()) 168 n, err := rand.Read(iv) 169 if err != nil { 170 panic(err) 171 } 172 if n != len(iv) { 173 panic("short read from crypto/rand") 174 } 175 return iv 176 } 177 178 /* 179 Meta format: 180 <16 bytes of IV> (for AES-128) 181 <20 bytes of SHA-1 of plaintext> 182 <encrypted> 183 184 Where encrypted has plaintext of: 185 #camlistore/encmeta=1 186 Then sorted lines, each ending in a newline, like: 187 sha1-plain/<metaValue> 188 See the encodeMetaValue for the definition of metaValue, but in summary: 189 sha1-plain/<plaintext size>/<iv as %x>/sha1-encrypted/<encrypted size> 190 */ 191 192 func (s *storage) makeSingleMetaBlob(plainBR blob.Ref, meta string) []byte { 193 iv := s.randIV() 194 195 var plain bytes.Buffer 196 plain.WriteString("#camlistore/encmeta=1\n") 197 plain.WriteString(plainBR.String()) 198 plain.WriteByte('/') 199 plain.WriteString(meta) 200 plain.WriteByte('\n') 201 202 s1 := sha1.New() 203 s1.Write(plain.Bytes()) 204 205 var final bytes.Buffer 206 final.Grow(len(iv) + sha1.Size + plain.Len()) 207 final.Write(iv) 208 final.Write(s1.Sum(final.Bytes()[len(iv):])) 209 210 _, err := io.Copy(cipher.StreamWriter{S: cipher.NewCTR(s.block, iv), W: &final}, &plain) 211 if err != nil { 212 panic(err) 213 } 214 return final.Bytes() 215 } 216 217 func (s *storage) RemoveBlobs(blobs []blob.Ref) error { 218 panic("TODO: implement") 219 } 220 221 func (s *storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { 222 for _, br := range blobs { 223 v, err := s.index.Get(br.String()) 224 if err == sorted.ErrNotFound { 225 continue 226 } 227 if err != nil { 228 return err 229 } 230 plainSize, ok := parseMetaValuePlainSize(v) 231 if !ok { 232 continue 233 } 234 if err != nil { 235 continue 236 } 237 dest <- blob.SizedRef{br, plainSize} 238 } 239 return nil 240 } 241 242 func (s *storage) ReceiveBlob(plainBR blob.Ref, source io.Reader) (sb blob.SizedRef, err error) { 243 iv := s.randIV() 244 stream := cipher.NewCTR(s.block, iv) 245 246 hash := plainBR.Hash() 247 var buf bytes.Buffer 248 // TODO: compress before encrypting? 249 buf.Write(iv) // TODO: write more structured header w/ version & IV length? or does that weaken it? 250 sw := cipher.StreamWriter{S: stream, W: &buf} 251 plainSize, err := io.Copy(io.MultiWriter(sw, hash), source) 252 if err != nil { 253 return sb, err 254 } 255 if !plainBR.HashMatches(hash) { 256 return sb, blobserver.ErrCorruptBlob 257 } 258 259 encBR := blob.SHA1FromBytes(buf.Bytes()) 260 _, err = blobserver.Receive(s.blobs, encBR, bytes.NewReader(buf.Bytes())) 261 if err != nil { 262 log.Printf("encrypt: error writing encrypted blob %v (plaintext %v): %v", encBR, plainBR, err) 263 return sb, errors.New("encrypt: error writing encrypted blob") 264 } 265 266 meta := encodeMetaValue(uint32(plainSize), iv, encBR, buf.Len()) 267 metaBlob := s.makeSingleMetaBlob(plainBR, meta) 268 _, err = blobserver.ReceiveNoHash(s.meta, blob.SHA1FromBytes(metaBlob), bytes.NewReader(metaBlob)) 269 if err != nil { 270 log.Printf("encrypt: error writing encrypted meta for plaintext %v (encrypted blob %v): %v", plainBR, encBR, err) 271 return sb, errors.New("encrypt: error writing encrypted meta") 272 } 273 274 err = s.index.Set(plainBR.String(), meta) 275 if err != nil { 276 return sb, fmt.Errorf("encrypt: error updating index for encrypted %v (plaintext %v): %v", err) 277 } 278 279 return blob.SizedRef{plainBR, uint32(plainSize)}, nil 280 } 281 282 func (s *storage) Fetch(plainBR blob.Ref) (file io.ReadCloser, size uint32, err error) { 283 meta, err := s.fetchMeta(plainBR) 284 if err != nil { 285 return nil, 0, err 286 } 287 encData, _, err := s.blobs.Fetch(meta.EncBlobRef) 288 if err != nil { 289 log.Printf("encrypt: plaintext %s's encrypted %v blob not found", plainBR, meta.EncBlobRef) 290 return 291 } 292 defer encData.Close() 293 294 // Quick sanity check that the blob begins with the same IV we 295 // have in our metadata. 296 blobIV := make([]byte, len(meta.IV)) 297 _, err = io.ReadFull(encData, blobIV) 298 if err != nil { 299 return nil, 0, fmt.Errorf("Error reading off IV header from blob: %v", err) 300 } 301 if !bytes.Equal(blobIV, meta.IV) { 302 return nil, 0, fmt.Errorf("Blob and meta IV don't match") 303 } 304 305 // Slurp the whole blob into memory to validate its plaintext 306 // checksum (no tampered bits) before returning it. Clients 307 // should be the party doing this in the general case, but 308 // we'll be extra paranoid and always do it here, at the cost 309 // of sometimes having it be done twice. 310 var plain bytes.Buffer 311 plainHash := plainBR.Hash() 312 plainSize, err := io.Copy(io.MultiWriter(&plain, plainHash), cipher.StreamReader{ 313 S: cipher.NewCTR(s.block, meta.IV), 314 R: encData, 315 }) 316 if err != nil { 317 return nil, 0, err 318 } 319 size = types.U32(plainSize) 320 if !plainBR.HashMatches(plainHash) { 321 return nil, 0, blobserver.ErrCorruptBlob 322 } 323 return struct { 324 *bytes.Reader 325 io.Closer 326 }{ 327 bytes.NewReader(plain.Bytes()), 328 types.NopCloser, 329 }, uint32(plainSize), nil 330 } 331 332 func (s *storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { 333 defer close(dest) 334 iter := s.index.Find(after, "") 335 n := 0 336 for iter.Next() { 337 if iter.Key() == after { 338 continue 339 } 340 br, ok := blob.Parse(iter.Key()) 341 if !ok { 342 panic("Bogus encrypt index key: " + iter.Key()) 343 } 344 plainSize, ok := parseMetaValuePlainSize(iter.Value()) 345 if !ok { 346 panic("Bogus encrypt index value: " + iter.Value()) 347 } 348 select { 349 case dest <- blob.SizedRef{br, plainSize}: 350 case <-ctx.Done(): 351 return context.ErrCanceled 352 } 353 n++ 354 if limit != 0 && n >= limit { 355 break 356 } 357 } 358 return iter.Close() 359 } 360 361 // processEncryptedMetaBlob decrypts dat (the data for the br meta blob) and parses 362 // its meta lines, updating the index. 363 // 364 // processEncryptedMetaBlob is not thread-safe. 365 func (s *storage) processEncryptedMetaBlob(br blob.Ref, dat []byte) error { 366 mi := &metaBlobInfo{ 367 br: br, 368 n: len(dat), 369 } 370 log.Printf("processing meta blob %v: %d bytes", br, len(dat)) 371 ivSize := s.block.BlockSize() 372 if len(dat) < ivSize+sha1.Size { 373 return errors.New("data size is smaller than IV + SHA-1") 374 } 375 var ( 376 iv = dat[:ivSize] 377 wantHash = dat[ivSize : ivSize+sha1.Size] 378 enc = dat[ivSize+sha1.Size:] 379 ) 380 plain := bytes.NewBuffer(make([]byte, 0, len(dat))) 381 io.Copy(plain, cipher.StreamReader{ 382 S: cipher.NewCTR(s.block, iv), 383 R: bytes.NewReader(enc), 384 }) 385 s1 := sha1.New() 386 s1.Write(plain.Bytes()) 387 if !bytes.Equal(wantHash, s1.Sum(nil)) { 388 return errors.New("hash of encrypted data doesn't match") 389 } 390 sc := bufio.NewScanner(plain) 391 if !sc.Scan() { 392 return errors.New("No first line") 393 } 394 if sc.Text() != "#camlistore/encmeta=1" { 395 line := sc.Text() 396 if len(line) > 80 { 397 line = line[:80] 398 } 399 return fmt.Errorf("unsupported first line %q", line) 400 } 401 for sc.Scan() { 402 line := sc.Text() 403 slash := strings.Index(line, "/") 404 if slash < 0 { 405 return errors.New("no slash in metaline") 406 } 407 plainBR, meta := line[:slash], line[slash+1:] 408 log.Printf("Adding meta: %q = %q", plainBR, meta) 409 mi.plains = append(mi.plains, blob.ParseOrZero(plainBR)) 410 if err := s.index.Set(plainBR, meta); err != nil { 411 return err 412 } 413 } 414 return sc.Err() 415 } 416 417 func (s *storage) readAllMetaBlobs() error { 418 type metaBlob struct { 419 br blob.Ref 420 dat []byte // encrypted blob 421 err error 422 } 423 metac := make(chan metaBlob, 16) 424 425 const maxInFlight = 50 426 var gate = make(chan bool, maxInFlight) 427 428 var stopEnumerate = make(chan bool) // closed on error 429 enumErrc := make(chan error, 1) 430 go func() { 431 var wg sync.WaitGroup 432 enumErrc <- blobserver.EnumerateAll(context.TODO(), s.meta, func(sb blob.SizedRef) error { 433 select { 434 case <-stopEnumerate: 435 return errors.New("enumeration stopped") 436 default: 437 } 438 439 wg.Add(1) 440 gate <- true 441 go func() { 442 defer wg.Done() 443 defer func() { <-gate }() 444 rc, _, err := s.meta.Fetch(sb.Ref) 445 var all []byte 446 if err == nil { 447 all, err = ioutil.ReadAll(rc) 448 rc.Close() 449 } 450 metac <- metaBlob{sb.Ref, all, err} 451 }() 452 return nil 453 }) 454 wg.Wait() 455 close(metac) 456 }() 457 458 for mi := range metac { 459 err := mi.err 460 if err == nil { 461 err = s.processEncryptedMetaBlob(mi.br, mi.dat) 462 } 463 if err != nil { 464 close(stopEnumerate) 465 go func() { 466 for _ = range metac { 467 } 468 }() 469 // TODO: advertise in this error message a new option or environment variable 470 // to skip a certain or all meta blobs, to allow partial recovery, if some 471 // are corrupt. For now, require all to be correct. 472 return fmt.Errorf("Error with meta blob %v: %v", mi.br, err) 473 } 474 } 475 476 return <-enumErrc 477 } 478 479 func encodeMetaValue(plainSize uint32, iv []byte, encBR blob.Ref, encSize int) string { 480 return fmt.Sprintf("%d/%x/%s/%d", plainSize, iv, encBR, encSize) 481 } 482 483 type metaValue struct { 484 IV []byte 485 EncBlobRef blob.Ref 486 EncSize uint32 487 PlainSize uint32 488 } 489 490 // returns os.ErrNotExist on cache miss 491 func (s *storage) fetchMeta(b blob.Ref) (*metaValue, error) { 492 v, err := s.index.Get(b.String()) 493 if err == sorted.ErrNotFound { 494 err = os.ErrNotExist 495 } 496 if err != nil { 497 return nil, err 498 } 499 return parseMetaValue(v) 500 } 501 502 func parseMetaValuePlainSize(v string) (plainSize uint32, ok bool) { 503 slash := strings.Index(v, "/") 504 if slash < 0 { 505 return 506 } 507 n, err := strconv.ParseUint(v[:slash], 10, 32) 508 if err != nil { 509 return 510 } 511 return uint32(n), true 512 } 513 514 func parseMetaValue(v string) (mv *metaValue, err error) { 515 f := strings.Split(v, "/") 516 if len(f) != 4 { 517 return nil, errors.New("wrong number of fields") 518 } 519 mv = &metaValue{} 520 plainSize, err := strconv.ParseUint(f[0], 10, 32) 521 if err != nil { 522 return nil, fmt.Errorf("bad plaintext size in meta %q", v) 523 } 524 mv.PlainSize = uint32(plainSize) 525 mv.IV, err = hex.DecodeString(f[1]) 526 if err != nil { 527 return nil, fmt.Errorf("bad iv in meta %q", v) 528 } 529 var ok bool 530 mv.EncBlobRef, ok = blob.Parse(f[2]) 531 if !ok { 532 return nil, fmt.Errorf("bad blobref in meta %q", v) 533 } 534 encSize, err := strconv.ParseUint(f[3], 10, 32) 535 if err != nil { 536 return nil, fmt.Errorf("bad encrypted size in meta %q", v) 537 } 538 mv.EncSize = uint32(encSize) 539 return mv, nil 540 } 541 542 func init() { 543 blobserver.RegisterStorageConstructor("encrypt", blobserver.StorageConstructor(newFromConfig)) 544 } 545 546 func newFromConfig(ld blobserver.Loader, config jsonconfig.Obj) (bs blobserver.Storage, err error) { 547 metaConf := config.RequiredObject("metaIndex") 548 sto := &storage{} 549 agreement := config.OptionalString("I_AGREE", "") 550 const wantAgreement = "that encryption support hasn't been peer-reviewed, isn't finished, and its format might change." 551 if agreement != wantAgreement { 552 return nil, errors.New("Use of the 'encrypt' target without the proper I_AGREE value.") 553 } 554 555 key := config.OptionalString("key", "") 556 keyFile := config.OptionalString("keyFile", "") 557 var keyb []byte 558 switch { 559 case key != "": 560 keyb, err = hex.DecodeString(key) 561 if err != nil || len(keyb) != 16 { 562 return nil, fmt.Errorf("The 'key' parameter must be 16 bytes of 32 hex digits. (currently fixed at AES-128)") 563 } 564 case keyFile != "": 565 // TODO: check that keyFile's unix permissions aren't too permissive. 566 keyb, err = ioutil.ReadFile(keyFile) 567 if err != nil { 568 return nil, fmt.Errorf("Reading key file %v: %v", keyFile, err) 569 } 570 } 571 blobStorage := config.RequiredString("blobs") 572 metaStorage := config.RequiredString("meta") 573 if err := config.Validate(); err != nil { 574 return nil, err 575 } 576 577 sto.index, err = sorted.NewKeyValue(metaConf) 578 if err != nil { 579 return 580 } 581 582 sto.blobs, err = ld.GetStorage(blobStorage) 583 if err != nil { 584 return 585 } 586 sto.meta, err = ld.GetStorage(metaStorage) 587 if err != nil { 588 return 589 } 590 591 if keyb == nil { 592 // TODO: add a way to prompt from stdin on start? or keychain support? 593 return nil, errors.New("no encryption key set with 'key' or 'keyFile'") 594 } 595 596 if err := sto.setKey(keyb); err != nil { 597 return nil, err 598 } 599 600 start := time.Now() 601 log.Printf("Reading encryption metadata...") 602 if err := sto.readAllMetaBlobs(); err != nil { 603 return nil, fmt.Errorf("Error scanning metadata on start-up: %v", err) 604 } 605 log.Printf("Read all encryption metadata in %.3f seconds", time.Since(start).Seconds()) 606 607 return sto, nil 608 }