github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/store.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package nbs 23 24 import ( 25 "context" 26 "fmt" 27 "io" 28 "io/ioutil" 29 "os" 30 "path/filepath" 31 "reflect" 32 "sort" 33 "sync" 34 "time" 35 36 "cloud.google.com/go/storage" 37 "github.com/dustin/go-humanize" 38 "github.com/pkg/errors" 39 "golang.org/x/sync/errgroup" 40 41 "github.com/dolthub/dolt/go/libraries/utils/tracing" 42 "github.com/dolthub/dolt/go/store/blobstore" 43 "github.com/dolthub/dolt/go/store/chunks" 44 "github.com/dolthub/dolt/go/store/hash" 45 ) 46 47 var ErrFetchFailure = errors.New("fetch failed") 48 var ErrSpecWithoutChunkSource = errors.New("manifest referenced table file for which there is no chunkSource.") 49 50 // The root of a Noms Chunk Store is stored in a 'manifest', along with the 51 // names of the tables that hold all the chunks in the store. The number of 52 // chunks in each table is also stored in the manifest. 53 54 const ( 55 // StorageVersion is the version of the on-disk Noms Chunks Store data format. 56 StorageVersion = "5" 57 58 defaultMemTableSize uint64 = (1 << 20) * 128 // 128MB 59 defaultMaxTables = 256 60 61 defaultIndexCacheSize = (1 << 20) * 64 // 64MB 62 defaultManifestCacheSize = 1 << 23 // 8MB 63 preflushChunkCount = 8 64 65 copyTableFileBufferSize = 128 * 1024 * 1024 66 ) 67 68 var ( 69 cacheOnce = sync.Once{} 70 globalIndexCache *indexCache 71 makeManifestManager func(manifest) manifestManager 72 globalFDCache *fdCache 73 ) 74 75 func makeGlobalCaches() { 76 globalIndexCache = newIndexCache(defaultIndexCacheSize) 77 globalFDCache = newFDCache(defaultMaxTables) 78 79 manifestCache := newManifestCache(defaultManifestCacheSize) 80 manifestLocks := newManifestLocks() 81 makeManifestManager = func(m manifest) manifestManager { return manifestManager{m, manifestCache, manifestLocks} } 82 } 83 84 type NomsBlockStore struct { 85 mm manifestManager 86 p tablePersister 87 c conjoiner 88 89 mu sync.RWMutex // protects the following state 90 mt *memTable 91 tables tableSet 92 upstream manifestContents 93 94 mtSize uint64 95 putCount uint64 96 97 stats *Stats 98 } 99 100 var _ TableFileStore = &NomsBlockStore{} 101 var _ chunks.ChunkStoreGarbageCollector = &NomsBlockStore{} 102 103 type Range struct { 104 Offset uint64 105 Length uint32 106 } 107 108 func (nbs *NomsBlockStore) GetChunkLocations(hashes hash.HashSet) (map[hash.Hash]map[hash.Hash]Range, error) { 109 gr := toGetRecords(hashes) 110 111 ranges := make(map[hash.Hash]map[hash.Hash]Range) 112 f := func(css chunkSources) error { 113 for _, cs := range css { 114 switch tr := cs.(type) { 115 case *mmapTableReader: 116 offsetRecSlice, _ := tr.findOffsets(gr) 117 if len(offsetRecSlice) > 0 { 118 y, ok := ranges[hash.Hash(tr.h)] 119 120 if !ok { 121 y = make(map[hash.Hash]Range) 122 } 123 124 for _, offsetRec := range offsetRecSlice { 125 h := hash.Hash(*offsetRec.a) 126 y[h] = Range{Offset: offsetRec.offset, Length: offsetRec.length} 127 128 delete(hashes, h) 129 } 130 131 if len(offsetRecSlice) > 0 { 132 gr = toGetRecords(hashes) 133 } 134 135 ranges[hash.Hash(tr.h)] = y 136 } 137 case *chunkSourceAdapter: 138 y, ok := ranges[hash.Hash(tr.h)] 139 140 if !ok { 141 y = make(map[hash.Hash]Range) 142 } 143 144 tableIndex, err := tr.index() 145 146 if err != nil { 147 return err 148 } 149 150 var foundHashes []hash.Hash 151 for h := range hashes { 152 a := addr(h) 153 e, ok := tableIndex.Lookup(&a) 154 if ok { 155 foundHashes = append(foundHashes, h) 156 y[h] = Range{Offset: e.Offset(), Length: e.Length()} 157 } 158 } 159 160 ranges[hash.Hash(tr.h)] = y 161 162 for _, h := range foundHashes { 163 delete(hashes, h) 164 } 165 166 default: 167 panic(reflect.TypeOf(cs)) 168 } 169 170 } 171 172 return nil 173 } 174 175 err := f(nbs.tables.upstream) 176 177 if err != nil { 178 return nil, err 179 } 180 181 err = f(nbs.tables.novel) 182 183 if err != nil { 184 return nil, err 185 } 186 187 return ranges, nil 188 } 189 190 func (nbs *NomsBlockStore) UpdateManifest(ctx context.Context, updates map[hash.Hash]uint32) (mi ManifestInfo, err error) { 191 nbs.mm.LockForUpdate() 192 defer func() { 193 unlockErr := nbs.mm.UnlockForUpdate() 194 195 if err == nil { 196 err = unlockErr 197 } 198 }() 199 200 nbs.mu.Lock() 201 defer nbs.mu.Unlock() 202 203 var stats Stats 204 var ok bool 205 var contents manifestContents 206 ok, contents, err = nbs.mm.Fetch(ctx, &stats) 207 208 if err != nil { 209 return manifestContents{}, err 210 } else if !ok { 211 contents = manifestContents{vers: nbs.upstream.vers} 212 } 213 214 currSpecs := contents.getSpecSet() 215 216 var addCount int 217 for h, count := range updates { 218 a := addr(h) 219 220 if _, ok := currSpecs[a]; !ok { 221 addCount++ 222 contents.specs = append(contents.specs, tableSpec{a, count}) 223 } 224 } 225 226 if addCount == 0 { 227 return contents, nil 228 } 229 230 // ensure we dont drop existing appendices 231 if contents.appendix != nil && len(contents.appendix) > 0 { 232 contents, err = fromManifestAppendixOptionNewContents(contents, contents.appendix, ManifestAppendixOption_Set) 233 if err != nil { 234 return manifestContents{}, err 235 } 236 } 237 238 var updatedContents manifestContents 239 updatedContents, err = nbs.mm.Update(ctx, contents.lock, contents, &stats, nil) 240 241 if err != nil { 242 return manifestContents{}, err 243 } 244 245 newTables, err := nbs.tables.Rebase(ctx, contents.specs, nbs.stats) 246 247 if err != nil { 248 return manifestContents{}, err 249 } 250 251 nbs.upstream = updatedContents 252 oldTables := nbs.tables 253 nbs.tables = newTables 254 err = oldTables.Close() 255 if err != nil { 256 return manifestContents{}, err 257 } 258 259 return updatedContents, nil 260 } 261 262 func (nbs *NomsBlockStore) UpdateManifestWithAppendix(ctx context.Context, updates map[hash.Hash]uint32, option ManifestAppendixOption) (mi ManifestInfo, err error) { 263 nbs.mm.LockForUpdate() 264 defer func() { 265 unlockErr := nbs.mm.UnlockForUpdate() 266 267 if err == nil { 268 err = unlockErr 269 } 270 }() 271 272 nbs.mu.Lock() 273 defer nbs.mu.Unlock() 274 275 var stats Stats 276 var ok bool 277 var contents manifestContents 278 ok, contents, err = nbs.mm.Fetch(ctx, &stats) 279 280 if err != nil { 281 return manifestContents{}, err 282 } else if !ok { 283 contents = manifestContents{vers: nbs.upstream.vers} 284 } 285 286 currAppendixSpecs := contents.getAppendixSet() 287 288 appendixSpecs := make([]tableSpec, 0) 289 var addCount int 290 for h, count := range updates { 291 a := addr(h) 292 293 if option == ManifestAppendixOption_Set { 294 appendixSpecs = append(appendixSpecs, tableSpec{a, count}) 295 } else { 296 if _, ok := currAppendixSpecs[a]; !ok { 297 addCount++ 298 appendixSpecs = append(appendixSpecs, tableSpec{a, count}) 299 } 300 } 301 } 302 303 if addCount == 0 && option != ManifestAppendixOption_Set { 304 return contents, nil 305 } 306 307 contents, err = fromManifestAppendixOptionNewContents(contents, appendixSpecs, option) 308 if err != nil { 309 return manifestContents{}, err 310 } 311 312 var updatedContents manifestContents 313 updatedContents, err = nbs.mm.Update(ctx, contents.lock, contents, &stats, nil) 314 if err != nil { 315 return manifestContents{}, err 316 } 317 318 newTables, err := nbs.tables.Rebase(ctx, contents.specs, nbs.stats) 319 if err != nil { 320 return manifestContents{}, err 321 } 322 323 nbs.upstream = updatedContents 324 oldTables := nbs.tables 325 nbs.tables = newTables 326 err = oldTables.Close() 327 if err != nil { 328 return manifestContents{}, err 329 } 330 return updatedContents, nil 331 } 332 333 func fromManifestAppendixOptionNewContents(upstream manifestContents, appendixSpecs []tableSpec, option ManifestAppendixOption) (manifestContents, error) { 334 contents, upstreamAppendixSpecs := upstream.removeAppendixSpecs() 335 switch option { 336 case ManifestAppendixOption_Append: 337 // prepend all appendix specs to contents.specs 338 specs := append([]tableSpec{}, appendixSpecs...) 339 specs = append(specs, upstreamAppendixSpecs...) 340 contents.specs = append(specs, contents.specs...) 341 342 // append all appendix specs to contents.appendix 343 newAppendixSpecs := append([]tableSpec{}, upstreamAppendixSpecs...) 344 contents.appendix = append(newAppendixSpecs, appendixSpecs...) 345 346 return contents, nil 347 case ManifestAppendixOption_Set: 348 if len(appendixSpecs) < 1 { 349 return contents, nil 350 } 351 352 // prepend new appendix specs to contents.specs 353 // dropping all upstream appendix specs 354 specs := append([]tableSpec{}, appendixSpecs...) 355 contents.specs = append(specs, contents.specs...) 356 357 // append new appendix specs to contents.appendix 358 contents.appendix = append([]tableSpec{}, appendixSpecs...) 359 return contents, nil 360 default: 361 return manifestContents{}, ErrUnsupportedManifestAppendixOption 362 } 363 } 364 365 // GetManifestStorageVersion returns the manifest storage version or an error if the operation is not supported 366 func (nbs *NomsBlockStore) GetManifestStorageVersion(ctx context.Context) (version string, err error) { 367 // possibly unnecessary 368 nbs.mm.LockForUpdate() 369 defer func() { 370 err = nbs.mm.UnlockForUpdate() 371 }() 372 nbs.mu.Lock() 373 defer nbs.mu.Unlock() 374 375 return nbs.mm.GetManifestVersion() 376 } 377 378 func NewAWSStoreWithMMapIndex(ctx context.Context, nbfVerStr string, table, ns, bucket string, s3 s3svc, ddb ddbsvc, memTableSize uint64) (*NomsBlockStore, error) { 379 cacheOnce.Do(makeGlobalCaches) 380 readRateLimiter := make(chan struct{}, 32) 381 p := &awsTablePersister{ 382 s3, 383 bucket, 384 readRateLimiter, 385 nil, 386 &ddbTableStore{ddb, table, readRateLimiter, nil}, 387 awsLimits{defaultS3PartSize, minS3PartSize, maxS3PartSize, maxDynamoItemSize, maxDynamoChunks}, 388 globalIndexCache, 389 ns, 390 func(bs []byte) (tableIndex, error) { 391 ohi, err := parseTableIndex(bs) 392 if err != nil { 393 return nil, err 394 } 395 return newMmapTableIndex(ohi, nil) 396 }, 397 } 398 mm := makeManifestManager(newDynamoManifest(table, ns, ddb)) 399 return newNomsBlockStore(ctx, nbfVerStr, mm, p, inlineConjoiner{defaultMaxTables}, memTableSize) 400 } 401 402 func NewAWSStore(ctx context.Context, nbfVerStr string, table, ns, bucket string, s3 s3svc, ddb ddbsvc, memTableSize uint64) (*NomsBlockStore, error) { 403 cacheOnce.Do(makeGlobalCaches) 404 readRateLimiter := make(chan struct{}, 32) 405 p := &awsTablePersister{ 406 s3, 407 bucket, 408 readRateLimiter, 409 nil, 410 &ddbTableStore{ddb, table, readRateLimiter, nil}, 411 awsLimits{defaultS3PartSize, minS3PartSize, maxS3PartSize, maxDynamoItemSize, maxDynamoChunks}, 412 globalIndexCache, 413 ns, 414 func(bs []byte) (tableIndex, error) { 415 return parseTableIndex(bs) 416 }, 417 } 418 mm := makeManifestManager(newDynamoManifest(table, ns, ddb)) 419 return newNomsBlockStore(ctx, nbfVerStr, mm, p, inlineConjoiner{defaultMaxTables}, memTableSize) 420 } 421 422 // NewGCSStore returns an nbs implementation backed by a GCSBlobstore 423 func NewGCSStore(ctx context.Context, nbfVerStr string, bucketName, path string, gcs *storage.Client, memTableSize uint64) (*NomsBlockStore, error) { 424 cacheOnce.Do(makeGlobalCaches) 425 426 bs := blobstore.NewGCSBlobstore(gcs, bucketName, path) 427 return NewBSStore(ctx, nbfVerStr, bs, memTableSize) 428 } 429 430 // NewBSStore returns an nbs implementation backed by a Blobstore 431 func NewBSStore(ctx context.Context, nbfVerStr string, bs blobstore.Blobstore, memTableSize uint64) (*NomsBlockStore, error) { 432 cacheOnce.Do(makeGlobalCaches) 433 434 mm := makeManifestManager(blobstoreManifest{"manifest", bs}) 435 436 p := &blobstorePersister{bs, s3BlockSize, globalIndexCache} 437 return newNomsBlockStore(ctx, nbfVerStr, mm, p, inlineConjoiner{defaultMaxTables}, memTableSize) 438 } 439 440 func NewLocalStore(ctx context.Context, nbfVerStr string, dir string, memTableSize uint64) (*NomsBlockStore, error) { 441 return newLocalStore(ctx, nbfVerStr, dir, memTableSize, defaultMaxTables) 442 } 443 444 func newLocalStore(ctx context.Context, nbfVerStr string, dir string, memTableSize uint64, maxTables int) (*NomsBlockStore, error) { 445 cacheOnce.Do(makeGlobalCaches) 446 err := checkDir(dir) 447 448 if err != nil { 449 return nil, err 450 } 451 452 m, err := getFileManifest(ctx, dir) 453 454 if err != nil { 455 return nil, err 456 } 457 458 mm := makeManifestManager(m) 459 p := newFSTablePersister(dir, globalFDCache, globalIndexCache) 460 nbs, err := newNomsBlockStore(ctx, nbfVerStr, mm, p, inlineConjoiner{maxTables}, memTableSize) 461 462 if err != nil { 463 return nil, err 464 } 465 466 return nbs, nil 467 } 468 469 func checkDir(dir string) error { 470 stat, err := os.Stat(dir) 471 if err != nil { 472 return err 473 } 474 if !stat.IsDir() { 475 return fmt.Errorf("path is not a directory: %s", dir) 476 } 477 return nil 478 } 479 480 func newNomsBlockStore(ctx context.Context, nbfVerStr string, mm manifestManager, p tablePersister, c conjoiner, memTableSize uint64) (*NomsBlockStore, error) { 481 if memTableSize == 0 { 482 memTableSize = defaultMemTableSize 483 } 484 485 nbs := &NomsBlockStore{ 486 mm: mm, 487 p: p, 488 c: c, 489 tables: newTableSet(p), 490 upstream: manifestContents{vers: nbfVerStr}, 491 mtSize: memTableSize, 492 stats: NewStats(), 493 } 494 495 t1 := time.Now() 496 defer nbs.stats.OpenLatency.SampleTimeSince(t1) 497 498 exists, contents, err := nbs.mm.Fetch(ctx, nbs.stats) 499 500 if err != nil { 501 return nil, err 502 } 503 504 if exists { 505 newTables, err := nbs.tables.Rebase(ctx, contents.specs, nbs.stats) 506 507 if err != nil { 508 return nil, err 509 } 510 511 nbs.upstream = contents 512 oldTables := nbs.tables 513 nbs.tables = newTables 514 err = oldTables.Close() 515 if err != nil { 516 return nil, err 517 } 518 } 519 520 return nbs, nil 521 } 522 523 // WithoutConjoiner returns a new *NomsBlockStore instance that will not 524 // conjoin table files during manifest updates. Used in some server-side 525 // contexts when things like table file maintenance is done out-of-process. Not 526 // safe for use outside of NomsBlockStore construction. 527 func (nbs *NomsBlockStore) WithoutConjoiner() *NomsBlockStore { 528 return &NomsBlockStore{ 529 mm: nbs.mm, 530 p: nbs.p, 531 c: noopConjoiner{}, 532 mu: sync.RWMutex{}, 533 mt: nbs.mt, 534 tables: nbs.tables, 535 upstream: nbs.upstream, 536 mtSize: nbs.mtSize, 537 putCount: nbs.putCount, 538 stats: nbs.stats, 539 } 540 } 541 542 func (nbs *NomsBlockStore) Put(ctx context.Context, c chunks.Chunk) error { 543 t1 := time.Now() 544 a := addr(c.Hash()) 545 success := nbs.addChunk(ctx, a, c.Data()) 546 547 if !success { 548 return errors.New("failed to add chunk") 549 } 550 551 nbs.putCount++ 552 553 nbs.stats.PutLatency.SampleTimeSince(t1) 554 555 return nil 556 } 557 558 func (nbs *NomsBlockStore) addChunk(ctx context.Context, h addr, data []byte) bool { 559 nbs.mu.Lock() 560 defer nbs.mu.Unlock() 561 if nbs.mt == nil { 562 nbs.mt = newMemTable(nbs.mtSize) 563 } 564 if !nbs.mt.addChunk(h, data) { 565 nbs.tables = nbs.tables.Prepend(ctx, nbs.mt, nbs.stats) 566 nbs.mt = newMemTable(nbs.mtSize) 567 return nbs.mt.addChunk(h, data) 568 } 569 return true 570 } 571 572 func (nbs *NomsBlockStore) Get(ctx context.Context, h hash.Hash) (chunks.Chunk, error) { 573 span, ctx := tracing.StartSpan(ctx, "nbs.Get") 574 defer func() { 575 span.Finish() 576 }() 577 578 t1 := time.Now() 579 defer func() { 580 nbs.stats.GetLatency.SampleTimeSince(t1) 581 nbs.stats.ChunksPerGet.Sample(1) 582 }() 583 584 a := addr(h) 585 data, tables, err := func() ([]byte, chunkReader, error) { 586 var data []byte 587 nbs.mu.RLock() 588 defer nbs.mu.RUnlock() 589 if nbs.mt != nil { 590 var err error 591 data, err = nbs.mt.get(ctx, a, nbs.stats) 592 593 if err != nil { 594 return nil, nil, err 595 } 596 } 597 return data, nbs.tables, nil 598 }() 599 600 if err != nil { 601 return chunks.EmptyChunk, err 602 } 603 604 if data != nil { 605 return chunks.NewChunkWithHash(h, data), nil 606 } 607 608 data, err = tables.get(ctx, a, nbs.stats) 609 610 if err != nil { 611 return chunks.EmptyChunk, err 612 } 613 614 if data != nil { 615 return chunks.NewChunkWithHash(h, data), nil 616 } 617 618 return chunks.EmptyChunk, nil 619 } 620 621 func (nbs *NomsBlockStore) GetMany(ctx context.Context, hashes hash.HashSet, found func(*chunks.Chunk)) error { 622 span, ctx := tracing.StartSpan(ctx, "nbs.GetMany") 623 span.LogKV("num_hashes", len(hashes)) 624 defer func() { 625 span.Finish() 626 }() 627 return nbs.getManyWithFunc(ctx, hashes, func(ctx context.Context, cr chunkReader, eg *errgroup.Group, reqs []getRecord, stats *Stats) (bool, error) { 628 return cr.getMany(ctx, eg, reqs, found, nbs.stats) 629 }) 630 } 631 632 func (nbs *NomsBlockStore) GetManyCompressed(ctx context.Context, hashes hash.HashSet, found func(CompressedChunk)) error { 633 span, ctx := tracing.StartSpan(ctx, "nbs.GetManyCompressed") 634 span.LogKV("num_hashes", len(hashes)) 635 defer func() { 636 span.Finish() 637 }() 638 return nbs.getManyWithFunc(ctx, hashes, func(ctx context.Context, cr chunkReader, eg *errgroup.Group, reqs []getRecord, stats *Stats) (bool, error) { 639 return cr.getManyCompressed(ctx, eg, reqs, found, nbs.stats) 640 }) 641 } 642 643 func (nbs *NomsBlockStore) getManyWithFunc( 644 ctx context.Context, 645 hashes hash.HashSet, 646 getManyFunc func(ctx context.Context, cr chunkReader, eg *errgroup.Group, reqs []getRecord, stats *Stats) (bool, error), 647 ) error { 648 t1 := time.Now() 649 reqs := toGetRecords(hashes) 650 651 defer func() { 652 if len(hashes) > 0 { 653 nbs.stats.GetLatency.SampleTimeSince(t1) 654 nbs.stats.ChunksPerGet.Sample(uint64(len(reqs))) 655 } 656 }() 657 658 eg, ctx := errgroup.WithContext(ctx) 659 660 tables, remaining, err := func() (tables chunkReader, remaining bool, err error) { 661 nbs.mu.RLock() 662 defer nbs.mu.RUnlock() 663 tables = nbs.tables 664 remaining = true 665 if nbs.mt != nil { 666 remaining, err = getManyFunc(ctx, nbs.mt, eg, reqs, nbs.stats) 667 } 668 return 669 }() 670 if err != nil { 671 return err 672 } 673 674 if remaining { 675 _, err = getManyFunc(ctx, tables, eg, reqs, nbs.stats) 676 } 677 678 if err != nil { 679 eg.Wait() 680 return err 681 } 682 return eg.Wait() 683 } 684 685 func toGetRecords(hashes hash.HashSet) []getRecord { 686 reqs := make([]getRecord, len(hashes)) 687 idx := 0 688 for h := range hashes { 689 a := addr(h) 690 reqs[idx] = getRecord{ 691 a: &a, 692 prefix: a.Prefix(), 693 } 694 idx++ 695 } 696 697 sort.Sort(getRecordByPrefix(reqs)) 698 return reqs 699 } 700 701 func (nbs *NomsBlockStore) CalcReads(hashes hash.HashSet, blockSize uint64) (reads int, split bool, err error) { 702 reqs := toGetRecords(hashes) 703 tables := func() (tables tableSet) { 704 nbs.mu.RLock() 705 defer nbs.mu.RUnlock() 706 tables = nbs.tables 707 708 return 709 }() 710 711 reads, split, remaining, err := tables.calcReads(reqs, blockSize) 712 713 if err != nil { 714 return 0, false, err 715 } 716 717 if remaining { 718 return 0, false, errors.New("failed to find all chunks") 719 } 720 721 return 722 } 723 724 func (nbs *NomsBlockStore) Count() (uint32, error) { 725 count, tables, err := func() (count uint32, tables chunkReader, err error) { 726 nbs.mu.RLock() 727 defer nbs.mu.RUnlock() 728 if nbs.mt != nil { 729 count, err = nbs.mt.count() 730 } 731 732 if err != nil { 733 return 0, nil, err 734 } 735 736 return count, nbs.tables, nil 737 }() 738 739 if err != nil { 740 return 0, err 741 } 742 743 tablesCount, err := tables.count() 744 745 if err != nil { 746 return 0, err 747 } 748 749 return count + tablesCount, nil 750 } 751 752 func (nbs *NomsBlockStore) Has(ctx context.Context, h hash.Hash) (bool, error) { 753 t1 := time.Now() 754 defer func() { 755 nbs.stats.HasLatency.SampleTimeSince(t1) 756 nbs.stats.AddressesPerHas.Sample(1) 757 }() 758 759 a := addr(h) 760 has, tables, err := func() (bool, chunkReader, error) { 761 nbs.mu.RLock() 762 defer nbs.mu.RUnlock() 763 764 if nbs.mt != nil { 765 has, err := nbs.mt.has(a) 766 767 if err != nil { 768 return false, nil, err 769 } 770 771 return has, nbs.tables, nil 772 } 773 774 return false, nbs.tables, nil 775 }() 776 777 if err != nil { 778 return false, err 779 } 780 781 if !has { 782 has, err = tables.has(a) 783 784 if err != nil { 785 return false, err 786 } 787 } 788 789 return has, nil 790 } 791 792 func (nbs *NomsBlockStore) HasMany(ctx context.Context, hashes hash.HashSet) (hash.HashSet, error) { 793 t1 := time.Now() 794 795 reqs := toHasRecords(hashes) 796 797 tables, remaining, err := func() (tables chunkReader, remaining bool, err error) { 798 nbs.mu.RLock() 799 defer nbs.mu.RUnlock() 800 tables = nbs.tables 801 802 remaining = true 803 if nbs.mt != nil { 804 remaining, err = nbs.mt.hasMany(reqs) 805 806 if err != nil { 807 return nil, false, err 808 } 809 } 810 811 return tables, remaining, nil 812 }() 813 814 if err != nil { 815 return nil, err 816 } 817 818 if remaining { 819 _, err := tables.hasMany(reqs) 820 821 if err != nil { 822 return nil, err 823 } 824 } 825 826 if len(hashes) > 0 { 827 nbs.stats.HasLatency.SampleTimeSince(t1) 828 nbs.stats.AddressesPerHas.SampleLen(len(reqs)) 829 } 830 831 absent := hash.HashSet{} 832 for _, r := range reqs { 833 if !r.has { 834 absent.Insert(hash.New(r.a[:])) 835 } 836 } 837 return absent, nil 838 } 839 840 func toHasRecords(hashes hash.HashSet) []hasRecord { 841 reqs := make([]hasRecord, len(hashes)) 842 idx := 0 843 for h := range hashes { 844 a := addr(h) 845 reqs[idx] = hasRecord{ 846 a: &a, 847 prefix: a.Prefix(), 848 order: idx, 849 } 850 idx++ 851 } 852 853 sort.Sort(hasRecordByPrefix(reqs)) 854 return reqs 855 } 856 857 func (nbs *NomsBlockStore) Rebase(ctx context.Context) error { 858 nbs.mu.Lock() 859 defer nbs.mu.Unlock() 860 exists, contents, err := nbs.mm.Fetch(ctx, nbs.stats) 861 862 if err != nil { 863 return err 864 } 865 866 if exists { 867 newTables, err := nbs.tables.Rebase(ctx, contents.specs, nbs.stats) 868 869 if err != nil { 870 return err 871 } 872 873 nbs.upstream = contents 874 oldTables := nbs.tables 875 nbs.tables = newTables 876 err = oldTables.Close() 877 if err != nil { 878 return err 879 } 880 } 881 882 return nil 883 } 884 885 func (nbs *NomsBlockStore) Root(ctx context.Context) (hash.Hash, error) { 886 nbs.mu.RLock() 887 defer nbs.mu.RUnlock() 888 return nbs.upstream.root, nil 889 } 890 891 func (nbs *NomsBlockStore) Commit(ctx context.Context, current, last hash.Hash) (success bool, err error) { 892 t1 := time.Now() 893 defer nbs.stats.CommitLatency.SampleTimeSince(t1) 894 895 anyPossiblyNovelChunks := func() bool { 896 nbs.mu.Lock() 897 defer nbs.mu.Unlock() 898 return nbs.mt != nil || nbs.tables.Novel() > 0 899 } 900 901 if !anyPossiblyNovelChunks() && current == last { 902 err := nbs.Rebase(ctx) 903 904 if err != nil { 905 return false, err 906 } 907 908 return true, nil 909 } 910 911 err = func() error { 912 // This is unfortunate. We want to serialize commits to the same store 913 // so that we avoid writing a bunch of unreachable small tables which result 914 // from optimistic lock failures. However, this means that the time to 915 // write tables is included in "commit" time and if all commits are 916 // serialized, it means a lot more waiting. 917 // "non-trivial" tables are persisted here, outside of the commit-lock. 918 // all other tables are persisted in updateManifest() 919 nbs.mu.Lock() 920 defer nbs.mu.Unlock() 921 922 if nbs.mt != nil { 923 cnt, err := nbs.mt.count() 924 925 if err != nil { 926 return err 927 } 928 929 if cnt > preflushChunkCount { 930 nbs.tables = nbs.tables.Prepend(ctx, nbs.mt, nbs.stats) 931 nbs.mt = nil 932 } 933 } 934 935 return nil 936 }() 937 938 if err != nil { 939 return false, err 940 } 941 942 nbs.mm.LockForUpdate() 943 defer func() { 944 unlockErr := nbs.mm.UnlockForUpdate() 945 946 if err == nil { 947 err = unlockErr 948 } 949 }() 950 951 nbs.mu.Lock() 952 defer nbs.mu.Unlock() 953 for { 954 if err := nbs.updateManifest(ctx, current, last); err == nil { 955 return true, nil 956 } else if err == errOptimisticLockFailedRoot || err == errLastRootMismatch { 957 return false, nil 958 } else if err != errOptimisticLockFailedTables { 959 return false, err 960 } 961 962 // I guess this thing infinitely retries without backoff in the case off errOptimisticLockFailedTables 963 } 964 } 965 966 var ( 967 errLastRootMismatch = fmt.Errorf("last does not match nbs.Root()") 968 errOptimisticLockFailedRoot = fmt.Errorf("root moved") 969 errOptimisticLockFailedTables = fmt.Errorf("tables changed") 970 ) 971 972 // callers must acquire lock |nbs.mu| 973 func (nbs *NomsBlockStore) updateManifest(ctx context.Context, current, last hash.Hash) error { 974 if nbs.upstream.root != last { 975 return errLastRootMismatch 976 } 977 978 handleOptimisticLockFailure := func(upstream manifestContents) error { 979 newTables, err := nbs.tables.Rebase(ctx, upstream.specs, nbs.stats) 980 if err != nil { 981 return err 982 } 983 984 nbs.upstream = upstream 985 oldTables := nbs.tables 986 nbs.tables = newTables 987 err = oldTables.Close() 988 989 if last != upstream.root { 990 return errOptimisticLockFailedRoot 991 } 992 993 if err != nil { 994 return err 995 } 996 997 return errOptimisticLockFailedTables 998 } 999 1000 if cached, doomed := nbs.mm.updateWillFail(nbs.upstream.lock); doomed { 1001 // Pre-emptive optimistic lock failure. Someone else in-process moved to the root, the set of tables, or both out from under us. 1002 return handleOptimisticLockFailure(cached) 1003 } 1004 1005 if nbs.mt != nil { 1006 cnt, err := nbs.mt.count() 1007 1008 if err != nil { 1009 return err 1010 } 1011 1012 if cnt > 0 { 1013 nbs.tables = nbs.tables.Prepend(ctx, nbs.mt, nbs.stats) 1014 nbs.mt = nil 1015 } 1016 } 1017 1018 if nbs.c.ConjoinRequired(nbs.tables) { 1019 var err error 1020 1021 newUpstream, err := nbs.c.Conjoin(ctx, nbs.upstream, nbs.mm, nbs.p, nbs.stats) 1022 1023 if err != nil { 1024 return err 1025 } 1026 1027 newTables, err := nbs.tables.Rebase(ctx, newUpstream.specs, nbs.stats) 1028 1029 if err != nil { 1030 return err 1031 } 1032 1033 nbs.upstream = newUpstream 1034 oldTables := nbs.tables 1035 nbs.tables = newTables 1036 err = oldTables.Close() 1037 if err != nil { 1038 return err 1039 } 1040 1041 return errOptimisticLockFailedTables 1042 } 1043 1044 specs, err := nbs.tables.ToSpecs() 1045 if err != nil { 1046 return err 1047 } 1048 1049 // ensure we dont drop appendices on commit 1050 var appendixSpecs []tableSpec 1051 if nbs.upstream.appendix != nil && len(nbs.upstream.appendix) > 0 { 1052 appendixSet := nbs.upstream.getAppendixSet() 1053 1054 filtered := make([]tableSpec, 0, len(specs)) 1055 for _, s := range specs { 1056 if _, present := appendixSet[s.name]; !present { 1057 filtered = append(filtered, s) 1058 } 1059 } 1060 1061 _, appendixSpecs = nbs.upstream.removeAppendixSpecs() 1062 prepended := append([]tableSpec{}, appendixSpecs...) 1063 specs = append(prepended, filtered...) 1064 } 1065 1066 newContents := manifestContents{ 1067 vers: nbs.upstream.vers, 1068 root: current, 1069 lock: generateLockHash(current, specs), 1070 gcGen: nbs.upstream.gcGen, 1071 specs: specs, 1072 appendix: appendixSpecs, 1073 } 1074 1075 upstream, err := nbs.mm.Update(ctx, nbs.upstream.lock, newContents, nbs.stats, nil) 1076 if err != nil { 1077 return err 1078 } 1079 1080 if newContents.lock != upstream.lock { 1081 // Optimistic lock failure. Someone else moved to the root, the set of tables, or both out from under us. 1082 return handleOptimisticLockFailure(upstream) 1083 } 1084 1085 newTables, err := nbs.tables.Flatten() 1086 1087 if err != nil { 1088 return nil 1089 } 1090 1091 nbs.upstream = newContents 1092 nbs.tables = newTables 1093 1094 return nil 1095 } 1096 1097 func (nbs *NomsBlockStore) Version() string { 1098 return nbs.upstream.vers 1099 } 1100 1101 func (nbs *NomsBlockStore) Close() error { 1102 return nbs.tables.Close() 1103 } 1104 1105 func (nbs *NomsBlockStore) Stats() interface{} { 1106 return nbs.stats.Clone() 1107 } 1108 1109 func (nbs *NomsBlockStore) StatsSummary() string { 1110 nbs.mu.Lock() 1111 defer nbs.mu.Unlock() 1112 cnt, _ := nbs.tables.count() 1113 physLen, _ := nbs.tables.physicalLen() 1114 return fmt.Sprintf("Root: %s; Chunk Count %d; Physical Bytes %s", nbs.upstream.root, cnt, humanize.Bytes(physLen)) 1115 } 1116 1117 // tableFile is our implementation of TableFile. 1118 type tableFile struct { 1119 info TableSpecInfo 1120 open func(ctx context.Context) (io.ReadCloser, error) 1121 } 1122 1123 // FileID gets the id of the file 1124 func (tf tableFile) FileID() string { 1125 return tf.info.GetName() 1126 } 1127 1128 // NumChunks returns the number of chunks in a table file 1129 func (tf tableFile) NumChunks() int { 1130 return int(tf.info.GetChunkCount()) 1131 } 1132 1133 // Open returns an io.ReadCloser which can be used to read the bytes of a table file. 1134 func (tf tableFile) Open(ctx context.Context) (io.ReadCloser, error) { 1135 return tf.open(ctx) 1136 } 1137 1138 // Sources retrieves the current root hash, a list of all table files (which may include appendix tablefiles), 1139 // and a second list of only the appendix table files 1140 func (nbs *NomsBlockStore) Sources(ctx context.Context) (hash.Hash, []TableFile, []TableFile, error) { 1141 nbs.mu.Lock() 1142 defer nbs.mu.Unlock() 1143 1144 stats := &Stats{} 1145 exists, contents, err := nbs.mm.m.ParseIfExists(ctx, stats, nil) 1146 1147 if err != nil { 1148 return hash.Hash{}, nil, nil, err 1149 } 1150 1151 if !exists { 1152 return hash.Hash{}, nil, nil, nil 1153 } 1154 1155 css, err := nbs.chunkSourcesByAddr() 1156 if err != nil { 1157 return hash.Hash{}, nil, nil, err 1158 } 1159 1160 appendixTableFiles, err := getTableFiles(css, contents, contents.NumAppendixSpecs(), func(mc manifestContents, idx int) tableSpec { 1161 return mc.getAppendixSpec(idx) 1162 }) 1163 if err != nil { 1164 return hash.Hash{}, nil, nil, err 1165 } 1166 1167 allTableFiles, err := getTableFiles(css, contents, contents.NumTableSpecs(), func(mc manifestContents, idx int) tableSpec { 1168 return mc.getSpec(idx) 1169 }) 1170 if err != nil { 1171 return hash.Hash{}, nil, nil, err 1172 } 1173 1174 return contents.GetRoot(), allTableFiles, appendixTableFiles, nil 1175 } 1176 1177 func getTableFiles(css map[addr]chunkSource, contents manifestContents, numSpecs int, specFunc func(mc manifestContents, idx int) tableSpec) ([]TableFile, error) { 1178 tableFiles := make([]TableFile, 0) 1179 if numSpecs == 0 { 1180 return tableFiles, nil 1181 } 1182 for i := 0; i < numSpecs; i++ { 1183 info := specFunc(contents, i) 1184 cs, ok := css[info.name] 1185 if !ok { 1186 return nil, ErrSpecWithoutChunkSource 1187 } 1188 tableFiles = append(tableFiles, newTableFile(cs, info)) 1189 } 1190 return tableFiles, nil 1191 } 1192 1193 func newTableFile(cs chunkSource, info tableSpec) tableFile { 1194 return tableFile{ 1195 info: info, 1196 open: func(ctx context.Context) (io.ReadCloser, error) { 1197 r, err := cs.reader(ctx) 1198 if err != nil { 1199 return nil, err 1200 } 1201 1202 return ioutil.NopCloser(r), nil 1203 }, 1204 } 1205 } 1206 1207 func (nbs *NomsBlockStore) Size(ctx context.Context) (uint64, error) { 1208 nbs.mu.Lock() 1209 defer nbs.mu.Unlock() 1210 1211 stats := &Stats{} 1212 exists, contents, err := nbs.mm.m.ParseIfExists(ctx, stats, nil) 1213 1214 if err != nil { 1215 return uint64(0), err 1216 } 1217 1218 if !exists { 1219 return uint64(0), nil 1220 } 1221 1222 css, err := nbs.chunkSourcesByAddr() 1223 if err != nil { 1224 return uint64(0), err 1225 } 1226 1227 numSpecs := contents.NumTableSpecs() 1228 1229 size := uint64(0) 1230 for i := 0; i < numSpecs; i++ { 1231 info := contents.getSpec(i) 1232 cs, ok := css[info.name] 1233 if !ok { 1234 return uint64(0), errors.New("manifest referenced table file for which there is no chunkSource.") 1235 } 1236 ti, err := cs.index() 1237 if err != nil { 1238 return uint64(0), fmt.Errorf("error getting table file index for chunkSource. %w", err) 1239 } 1240 size += ti.TableFileSize() 1241 } 1242 return size, nil 1243 } 1244 1245 func (nbs *NomsBlockStore) chunkSourcesByAddr() (map[addr]chunkSource, error) { 1246 css := make(map[addr]chunkSource, len(nbs.tables.upstream)+len(nbs.tables.novel)) 1247 for _, cs := range nbs.tables.upstream { 1248 a, err := cs.hash() 1249 if err != nil { 1250 return nil, err 1251 } 1252 css[a] = cs 1253 } 1254 for _, cs := range nbs.tables.novel { 1255 a, err := cs.hash() 1256 if err != nil { 1257 return nil, err 1258 } 1259 css[a] = cs 1260 } 1261 return css, nil 1262 1263 } 1264 1265 func (nbs *NomsBlockStore) SupportedOperations() TableFileStoreOps { 1266 _, ok := nbs.p.(*fsTablePersister) 1267 return TableFileStoreOps{ 1268 CanRead: true, 1269 CanWrite: ok, 1270 CanPrune: ok, 1271 CanGC: ok, 1272 } 1273 } 1274 1275 // WriteTableFile will read a table file from the provided reader and write it to the TableFileStore 1276 func (nbs *NomsBlockStore) WriteTableFile(ctx context.Context, fileId string, numChunks int, rd io.Reader, contentLength uint64, contentHash []byte) error { 1277 fsPersister, ok := nbs.p.(*fsTablePersister) 1278 1279 if !ok { 1280 return errors.New("Not implemented") 1281 } 1282 1283 path := filepath.Join(fsPersister.dir, fileId) 1284 1285 err := func() (err error) { 1286 var f *os.File 1287 f, err = os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm) 1288 1289 if err != nil { 1290 return err 1291 } 1292 1293 defer func() { 1294 closeErr := f.Close() 1295 1296 if err == nil { 1297 err = closeErr 1298 } 1299 }() 1300 1301 return writeTo(f, rd, copyTableFileBufferSize) 1302 }() 1303 1304 if err != nil { 1305 return err 1306 } 1307 1308 fileIdHash, ok := hash.MaybeParse(fileId) 1309 1310 if !ok { 1311 return errors.New("invalid base32 encoded hash: " + fileId) 1312 } 1313 1314 _, err = nbs.UpdateManifest(ctx, map[hash.Hash]uint32{fileIdHash: uint32(numChunks)}) 1315 1316 return err 1317 } 1318 1319 func writeTo(wr io.Writer, rd io.Reader, bufferSize uint32) error { 1320 buf := make([]byte, bufferSize) 1321 1322 for { 1323 // can return bytes and an io.EOF 1324 n, err := rd.Read(buf) 1325 1326 if err != nil && err != io.EOF { 1327 return err 1328 } 1329 1330 pos := 0 1331 for pos < n { 1332 n, wrErr := wr.Write(buf[pos:n]) 1333 1334 if wrErr != nil { 1335 return wrErr 1336 } 1337 1338 pos += n 1339 } 1340 1341 if err == io.EOF { 1342 break 1343 } 1344 } 1345 1346 return nil 1347 } 1348 1349 // PruneTableFiles deletes old table files that are no longer referenced in the manifest. 1350 func (nbs *NomsBlockStore) PruneTableFiles(ctx context.Context) (err error) { 1351 nbs.mu.Lock() 1352 defer nbs.mu.Unlock() 1353 1354 nbs.mm.LockForUpdate() 1355 defer func() { 1356 unlockErr := nbs.mm.UnlockForUpdate() 1357 1358 if err == nil { 1359 err = unlockErr 1360 } 1361 }() 1362 1363 for { 1364 // flush all tables and update manifest 1365 err = nbs.updateManifest(ctx, nbs.upstream.root, nbs.upstream.root) 1366 1367 if err == nil { 1368 break 1369 } else if err == errOptimisticLockFailedTables { 1370 continue 1371 } else { 1372 return err 1373 } 1374 1375 // Same behavior as Commit 1376 // infinitely retries without backoff in the case off errOptimisticLockFailedTables 1377 } 1378 1379 ok, contents, err := nbs.mm.Fetch(ctx, &Stats{}) 1380 if err != nil { 1381 return err 1382 } 1383 if !ok { 1384 return nil // no manifest exists 1385 } 1386 1387 return nbs.p.PruneTableFiles(ctx, contents) 1388 } 1389 1390 func (nbs *NomsBlockStore) MarkAndSweepChunks(ctx context.Context, last hash.Hash, keepChunks <-chan []hash.Hash) error { 1391 ops := nbs.SupportedOperations() 1392 if !ops.CanGC || !ops.CanPrune { 1393 return chunks.ErrUnsupportedOperation 1394 } 1395 1396 if nbs.upstream.root != last { 1397 return errLastRootMismatch 1398 } 1399 1400 specs, err := nbs.copyMarkedChunks(ctx, keepChunks) 1401 if err != nil { 1402 return err 1403 } 1404 if ctx.Err() != nil { 1405 return ctx.Err() 1406 } 1407 1408 err = nbs.swapTables(ctx, specs) 1409 if err != nil { 1410 return err 1411 } 1412 if ctx.Err() != nil { 1413 return ctx.Err() 1414 } 1415 1416 ok, contents, err := nbs.mm.Fetch(ctx, &Stats{}) 1417 if err != nil { 1418 return err 1419 } 1420 if !ok { 1421 panic("no manifest") 1422 } 1423 if ctx.Err() != nil { 1424 return ctx.Err() 1425 } 1426 1427 return nbs.p.PruneTableFiles(ctx, contents) 1428 } 1429 1430 func (nbs *NomsBlockStore) copyMarkedChunks(ctx context.Context, keepChunks <-chan []hash.Hash) ([]tableSpec, error) { 1431 gcc, err := newGarbageCollectionCopier() 1432 if err != nil { 1433 return nil, err 1434 } 1435 1436 LOOP: 1437 for { 1438 select { 1439 case hs, ok := <-keepChunks: 1440 if !ok { 1441 break LOOP 1442 } 1443 var addErr error 1444 mu := new(sync.Mutex) 1445 hashset := hash.NewHashSet(hs...) 1446 err := nbs.GetManyCompressed(ctx, hashset, func(c CompressedChunk) { 1447 mu.Lock() 1448 defer mu.Unlock() 1449 if addErr != nil { 1450 return 1451 } 1452 addErr = gcc.addChunk(ctx, c) 1453 }) 1454 if err != nil { 1455 return nil, err 1456 } 1457 if addErr != nil { 1458 return nil, addErr 1459 } 1460 case <-ctx.Done(): 1461 return nil, ctx.Err() 1462 } 1463 } 1464 1465 nomsDir := nbs.p.(*fsTablePersister).dir 1466 1467 return gcc.copyTablesToDir(ctx, nomsDir) 1468 } 1469 1470 // todo: what's the optimal table size to copy to? 1471 func (nbs *NomsBlockStore) gcTableSize() (uint64, error) { 1472 total, err := nbs.tables.physicalLen() 1473 1474 if err != nil { 1475 return 0, err 1476 } 1477 1478 avgTableSize := total / uint64(nbs.tables.Upstream()+nbs.tables.Novel()+1) 1479 1480 // max(avgTableSize, defaultMemTableSize) 1481 if avgTableSize > nbs.mtSize { 1482 return avgTableSize, nil 1483 } 1484 return nbs.mtSize, nil 1485 } 1486 1487 func (nbs *NomsBlockStore) swapTables(ctx context.Context, specs []tableSpec) error { 1488 newLock := generateLockHash(nbs.upstream.root, specs) 1489 newContents := manifestContents{ 1490 vers: nbs.upstream.vers, 1491 root: nbs.upstream.root, 1492 lock: newLock, 1493 gcGen: newLock, 1494 specs: specs, 1495 } 1496 1497 var err error 1498 nbs.mm.LockForUpdate() 1499 defer func() { 1500 unlockErr := nbs.mm.UnlockForUpdate() 1501 1502 if err == nil { 1503 err = unlockErr 1504 } 1505 }() 1506 1507 upstream, err := nbs.mm.UpdateGCGen(ctx, nbs.upstream.lock, newContents, nbs.stats, nil) 1508 if err != nil { 1509 return err 1510 } 1511 1512 // clear memTable 1513 nbs.mt = newMemTable(nbs.mtSize) 1514 1515 // clear nbs.tables.novel 1516 nbs.tables, err = nbs.tables.Flatten() 1517 1518 if err != nil { 1519 return nil 1520 } 1521 1522 // replace nbs.tables.upstream with gc compacted tables 1523 nbs.upstream = upstream 1524 nbs.tables, err = nbs.tables.Rebase(ctx, specs, nbs.stats) 1525 1526 if err != nil { 1527 return err 1528 } 1529 1530 return nil 1531 } 1532 1533 // SetRootChunk changes the root chunk hash from the previous value to the new root. 1534 func (nbs *NomsBlockStore) SetRootChunk(ctx context.Context, root, previous hash.Hash) error { 1535 nbs.mu.Lock() 1536 defer nbs.mu.Unlock() 1537 for { 1538 err := nbs.updateManifest(ctx, root, previous) 1539 1540 if err == nil { 1541 return nil 1542 } else if err == errOptimisticLockFailedTables { 1543 continue 1544 } else { 1545 return err 1546 } 1547 1548 // Same behavior as Commit 1549 // I guess this thing infinitely retries without backoff in the case off errOptimisticLockFailedTables 1550 } 1551 }