github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/objstorage/objstorageprovider/remote.go (about)

     1  // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package objstorageprovider
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"runtime"
    11  	"sync"
    12  	"sync/atomic"
    13  
    14  	"github.com/cockroachdb/errors"
    15  	"github.com/cockroachdb/pebble/internal/base"
    16  	"github.com/cockroachdb/pebble/internal/invariants"
    17  	"github.com/cockroachdb/pebble/objstorage"
    18  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider/remoteobjcat"
    19  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider/sharedcache"
    20  	"github.com/cockroachdb/pebble/objstorage/remote"
    21  	"github.com/cockroachdb/redact"
    22  )
    23  
    24  // remoteSubsystem contains the provider fields related to remote storage.
    25  // All fields remain unset if remote storage is not configured.
    26  type remoteSubsystem struct {
    27  	catalog *remoteobjcat.Catalog
    28  	// catalogSyncMutex is used to correctly serialize two sharedSync operations.
    29  	// It must be acquired before the provider mutex.
    30  	catalogSyncMutex sync.Mutex
    31  
    32  	cache *sharedcache.Cache
    33  
    34  	// shared contains the fields relevant to shared objects, i.e. objects that
    35  	// are created by Pebble and potentially shared between Pebble instances.
    36  	shared struct {
    37  		// initialized guards access to the creatorID field.
    38  		initialized atomic.Bool
    39  		creatorID   objstorage.CreatorID
    40  		initOnce    sync.Once
    41  
    42  		// checkRefsOnOpen controls whether we check the ref marker file when opening
    43  		// an object. Normally this is true when invariants are enabled (but the provider
    44  		// test tweaks this field).
    45  		checkRefsOnOpen bool
    46  	}
    47  }
    48  
    49  // remoteInit initializes the remote object subsystem (if configured) and finds
    50  // any remote objects.
    51  func (p *provider) remoteInit() error {
    52  	if p.st.Remote.StorageFactory == nil {
    53  		return nil
    54  	}
    55  	catalog, contents, err := remoteobjcat.Open(p.st.FS, p.st.FSDirName)
    56  	if err != nil {
    57  		return errors.Wrapf(err, "pebble: could not open remote object catalog")
    58  	}
    59  	p.remote.catalog = catalog
    60  	p.remote.shared.checkRefsOnOpen = invariants.Enabled
    61  
    62  	// The creator ID may or may not be initialized yet.
    63  	if contents.CreatorID.IsSet() {
    64  		p.remote.initShared(contents.CreatorID)
    65  		p.st.Logger.Infof("remote storage configured; creatorID = %s", contents.CreatorID)
    66  	} else {
    67  		p.st.Logger.Infof("remote storage configured; no creatorID yet")
    68  	}
    69  
    70  	if p.st.Remote.CacheSizeBytes > 0 {
    71  		const defaultBlockSize = 32 * 1024
    72  		blockSize := p.st.Remote.CacheBlockSize
    73  		if blockSize == 0 {
    74  			blockSize = defaultBlockSize
    75  		}
    76  
    77  		const defaultShardingBlockSize = 1024 * 1024
    78  		shardingBlockSize := p.st.Remote.ShardingBlockSize
    79  		if shardingBlockSize == 0 {
    80  			shardingBlockSize = defaultShardingBlockSize
    81  		}
    82  
    83  		numShards := p.st.Remote.CacheShardCount
    84  		if numShards == 0 {
    85  			numShards = 2 * runtime.GOMAXPROCS(0)
    86  		}
    87  
    88  		p.remote.cache, err = sharedcache.Open(
    89  			p.st.FS, p.st.Logger, p.st.FSDirName, blockSize, shardingBlockSize, p.st.Remote.CacheSizeBytes, numShards)
    90  		if err != nil {
    91  			return errors.Wrapf(err, "pebble: could not open remote object cache")
    92  		}
    93  	}
    94  
    95  	for _, meta := range contents.Objects {
    96  		o := objstorage.ObjectMetadata{
    97  			DiskFileNum: meta.FileNum,
    98  			FileType:    meta.FileType,
    99  		}
   100  		o.Remote.CreatorID = meta.CreatorID
   101  		o.Remote.CreatorFileNum = meta.CreatorFileNum
   102  		o.Remote.CleanupMethod = meta.CleanupMethod
   103  		o.Remote.Locator = meta.Locator
   104  		o.Remote.CustomObjectName = meta.CustomObjectName
   105  		o.Remote.Storage, err = p.ensureStorageLocked(o.Remote.Locator)
   106  		if err != nil {
   107  			return errors.Wrapf(err, "creating remote.Storage object for locator '%s'", o.Remote.Locator)
   108  		}
   109  		if invariants.Enabled {
   110  			o.AssertValid()
   111  		}
   112  		p.mu.knownObjects[o.DiskFileNum] = o
   113  	}
   114  	return nil
   115  }
   116  
   117  // initShared initializes the creator ID, allowing use of shared objects.
   118  func (ss *remoteSubsystem) initShared(creatorID objstorage.CreatorID) {
   119  	ss.shared.initOnce.Do(func() {
   120  		ss.shared.creatorID = creatorID
   121  		ss.shared.initialized.Store(true)
   122  	})
   123  }
   124  
   125  func (p *provider) sharedClose() error {
   126  	if p.st.Remote.StorageFactory == nil {
   127  		return nil
   128  	}
   129  	var err error
   130  	if p.remote.cache != nil {
   131  		err = p.remote.cache.Close()
   132  		p.remote.cache = nil
   133  	}
   134  	if p.remote.catalog != nil {
   135  		err = firstError(err, p.remote.catalog.Close())
   136  		p.remote.catalog = nil
   137  	}
   138  	return err
   139  }
   140  
   141  // SetCreatorID is part of the objstorage.Provider interface.
   142  func (p *provider) SetCreatorID(creatorID objstorage.CreatorID) error {
   143  	if p.st.Remote.StorageFactory == nil {
   144  		return errors.AssertionFailedf("attempt to set CreatorID but remote storage not enabled")
   145  	}
   146  	// Note: this call is a cheap no-op if the creator ID was already set. This
   147  	// call also checks if we are trying to change the ID.
   148  	if err := p.remote.catalog.SetCreatorID(creatorID); err != nil {
   149  		return err
   150  	}
   151  	if !p.remote.shared.initialized.Load() {
   152  		p.st.Logger.Infof("remote storage creatorID set to %s", creatorID)
   153  		p.remote.initShared(creatorID)
   154  	}
   155  	return nil
   156  }
   157  
   158  // IsSharedForeign is part of the objstorage.Provider interface.
   159  func (p *provider) IsSharedForeign(meta objstorage.ObjectMetadata) bool {
   160  	if !p.remote.shared.initialized.Load() {
   161  		return false
   162  	}
   163  	return meta.IsShared() && (meta.Remote.CreatorID != p.remote.shared.creatorID)
   164  }
   165  
   166  func (p *provider) remoteCheckInitialized() error {
   167  	if p.st.Remote.StorageFactory == nil {
   168  		return errors.Errorf("remote object support not configured")
   169  	}
   170  	return nil
   171  }
   172  
   173  func (p *provider) sharedCheckInitialized() error {
   174  	if err := p.remoteCheckInitialized(); err != nil {
   175  		return err
   176  	}
   177  	if !p.remote.shared.initialized.Load() {
   178  		return errors.Errorf("remote object support not available: remote creator ID not yet set")
   179  	}
   180  	return nil
   181  }
   182  
   183  func (p *provider) sharedSync() error {
   184  	// Serialize parallel sync operations. Note that ApplyBatch is already
   185  	// serialized internally, but we want to make sure they get called with
   186  	// batches in the right order.
   187  	p.remote.catalogSyncMutex.Lock()
   188  	defer p.remote.catalogSyncMutex.Unlock()
   189  
   190  	batch := func() remoteobjcat.Batch {
   191  		p.mu.Lock()
   192  		defer p.mu.Unlock()
   193  		res := p.mu.remote.catalogBatch.Copy()
   194  		p.mu.remote.catalogBatch.Reset()
   195  		return res
   196  	}()
   197  
   198  	if batch.IsEmpty() {
   199  		return nil
   200  	}
   201  
   202  	if err := p.remote.catalog.ApplyBatch(batch); err != nil {
   203  		// Put back the batch (for the next Sync), appending any operations that
   204  		// happened in the meantime.
   205  		p.mu.Lock()
   206  		defer p.mu.Unlock()
   207  		batch.Append(p.mu.remote.catalogBatch)
   208  		p.mu.remote.catalogBatch = batch
   209  		return err
   210  	}
   211  
   212  	return nil
   213  }
   214  
   215  func (p *provider) remotePath(meta objstorage.ObjectMetadata) string {
   216  	if meta.Remote.Locator != "" {
   217  		return fmt.Sprintf("remote-%s://%s", meta.Remote.Locator, remoteObjectName(meta))
   218  	}
   219  	return "remote://" + remoteObjectName(meta)
   220  }
   221  
   222  // sharedCreateRef creates a reference marker object.
   223  func (p *provider) sharedCreateRef(meta objstorage.ObjectMetadata) error {
   224  	if err := p.sharedCheckInitialized(); err != nil {
   225  		return err
   226  	}
   227  	if meta.Remote.CleanupMethod != objstorage.SharedRefTracking {
   228  		return nil
   229  	}
   230  	refName := p.sharedObjectRefName(meta)
   231  	writer, err := meta.Remote.Storage.CreateObject(refName)
   232  	if err == nil {
   233  		// The object is empty, just close the writer.
   234  		err = writer.Close()
   235  	}
   236  	if err != nil {
   237  		return errors.Wrapf(err, "creating marker object %q", errors.Safe(refName))
   238  	}
   239  	return nil
   240  }
   241  
   242  func (p *provider) sharedCreate(
   243  	_ context.Context,
   244  	fileType base.FileType,
   245  	fileNum base.DiskFileNum,
   246  	locator remote.Locator,
   247  	opts objstorage.CreateOptions,
   248  ) (objstorage.Writable, objstorage.ObjectMetadata, error) {
   249  	if err := p.sharedCheckInitialized(); err != nil {
   250  		return nil, objstorage.ObjectMetadata{}, err
   251  	}
   252  	storage, err := p.ensureStorage(locator)
   253  	if err != nil {
   254  		return nil, objstorage.ObjectMetadata{}, err
   255  	}
   256  	meta := objstorage.ObjectMetadata{
   257  		DiskFileNum: fileNum,
   258  		FileType:    fileType,
   259  	}
   260  	meta.Remote.CreatorID = p.remote.shared.creatorID
   261  	meta.Remote.CreatorFileNum = fileNum
   262  	meta.Remote.CleanupMethod = opts.SharedCleanupMethod
   263  	meta.Remote.Locator = locator
   264  	meta.Remote.Storage = storage
   265  
   266  	objName := remoteObjectName(meta)
   267  	writer, err := storage.CreateObject(objName)
   268  	if err != nil {
   269  		return nil, objstorage.ObjectMetadata{}, errors.Wrapf(err, "creating object %q", errors.Safe(objName))
   270  	}
   271  	return &sharedWritable{
   272  		p:             p,
   273  		meta:          meta,
   274  		storageWriter: writer,
   275  	}, meta, nil
   276  }
   277  
   278  func (p *provider) remoteOpenForReading(
   279  	ctx context.Context, meta objstorage.ObjectMetadata, opts objstorage.OpenOptions,
   280  ) (objstorage.Readable, error) {
   281  	if err := p.remoteCheckInitialized(); err != nil {
   282  		return nil, err
   283  	}
   284  	// Verify we have a reference on this object; for performance reasons, we only
   285  	// do this in testing scenarios.
   286  	if p.remote.shared.checkRefsOnOpen && meta.Remote.CleanupMethod == objstorage.SharedRefTracking {
   287  		if err := p.sharedCheckInitialized(); err != nil {
   288  			return nil, err
   289  		}
   290  		refName := p.sharedObjectRefName(meta)
   291  		if _, err := meta.Remote.Storage.Size(refName); err != nil {
   292  			if meta.Remote.Storage.IsNotExistError(err) {
   293  				if opts.MustExist {
   294  					p.st.Logger.Fatalf("marker object %q does not exist", errors.Safe(refName))
   295  					// TODO(radu): maybe list references for the object.
   296  				}
   297  				return nil, errors.Errorf("marker object %q does not exist", errors.Safe(refName))
   298  			}
   299  			return nil, errors.Wrapf(err, "checking marker object %q", errors.Safe(refName))
   300  		}
   301  	}
   302  	objName := remoteObjectName(meta)
   303  	reader, size, err := meta.Remote.Storage.ReadObject(ctx, objName)
   304  	if err != nil {
   305  		if opts.MustExist && meta.Remote.Storage.IsNotExistError(err) {
   306  			p.st.Logger.Fatalf("object %q does not exist", redact.SafeString(objName))
   307  			// TODO(radu): maybe list references for the object.
   308  		}
   309  		return nil, err
   310  	}
   311  	return p.newRemoteReadable(reader, size, meta.DiskFileNum), nil
   312  }
   313  
   314  func (p *provider) remoteSize(meta objstorage.ObjectMetadata) (int64, error) {
   315  	if err := p.remoteCheckInitialized(); err != nil {
   316  		return 0, err
   317  	}
   318  	objName := remoteObjectName(meta)
   319  	return meta.Remote.Storage.Size(objName)
   320  }
   321  
   322  // sharedUnref implements object "removal" with the remote backend. The ref
   323  // marker object is removed and the backing object is removed only if there are
   324  // no other ref markers.
   325  func (p *provider) sharedUnref(meta objstorage.ObjectMetadata) error {
   326  	if meta.Remote.CleanupMethod == objstorage.SharedNoCleanup {
   327  		// Never delete objects in this mode.
   328  		return nil
   329  	}
   330  	if p.isProtected(meta.DiskFileNum) {
   331  		// TODO(radu): we need a mechanism to unref the object when it becomes
   332  		// unprotected.
   333  		return nil
   334  	}
   335  
   336  	refName := p.sharedObjectRefName(meta)
   337  	// Tolerate a not-exists error.
   338  	if err := meta.Remote.Storage.Delete(refName); err != nil && !meta.Remote.Storage.IsNotExistError(err) {
   339  		return err
   340  	}
   341  	otherRefs, err := meta.Remote.Storage.List(sharedObjectRefPrefix(meta), "" /* delimiter */)
   342  	if err != nil {
   343  		return err
   344  	}
   345  	if len(otherRefs) == 0 {
   346  		objName := remoteObjectName(meta)
   347  		if err := meta.Remote.Storage.Delete(objName); err != nil && !meta.Remote.Storage.IsNotExistError(err) {
   348  			return err
   349  		}
   350  	}
   351  	return nil
   352  }
   353  
   354  // ensureStorageLocked populates the remote.Storage object for the given
   355  // locator, if necessary. p.mu must be held.
   356  func (p *provider) ensureStorageLocked(locator remote.Locator) (remote.Storage, error) {
   357  	if p.mu.remote.storageObjects == nil {
   358  		p.mu.remote.storageObjects = make(map[remote.Locator]remote.Storage)
   359  	}
   360  	if res, ok := p.mu.remote.storageObjects[locator]; ok {
   361  		return res, nil
   362  	}
   363  	res, err := p.st.Remote.StorageFactory.CreateStorage(locator)
   364  	if err != nil {
   365  		return nil, err
   366  	}
   367  
   368  	p.mu.remote.storageObjects[locator] = res
   369  	return res, nil
   370  }
   371  
   372  // ensureStorage populates the remote.Storage object for the given locator, if necessary.
   373  func (p *provider) ensureStorage(locator remote.Locator) (remote.Storage, error) {
   374  	p.mu.Lock()
   375  	defer p.mu.Unlock()
   376  	return p.ensureStorageLocked(locator)
   377  }