github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_sideload_disk.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"os"
    17  	"path/filepath"
    18  	"strconv"
    19  	"strings"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    23  	"github.com/cockroachdb/cockroach/pkg/storage"
    24  	"github.com/cockroachdb/errors"
    25  	"golang.org/x/time/rate"
    26  )
    27  
    28  var _ SideloadStorage = &diskSideloadStorage{}
    29  
    30  type diskSideloadStorage struct {
    31  	st         *cluster.Settings
    32  	limiter    *rate.Limiter
    33  	dir        string
    34  	dirCreated bool
    35  	eng        storage.Engine
    36  }
    37  
    38  func deprecatedSideloadedPath(
    39  	baseDir string, rangeID roachpb.RangeID, replicaID roachpb.ReplicaID,
    40  ) string {
    41  	return filepath.Join(
    42  		baseDir,
    43  		"sideloading",
    44  		fmt.Sprintf("%d", rangeID%1000), // sharding
    45  		fmt.Sprintf("%d.%d", rangeID, replicaID),
    46  	)
    47  }
    48  
    49  func sideloadedPath(baseDir string, rangeID roachpb.RangeID) string {
    50  	// Use one level of sharding to avoid too many items per directory. For
    51  	// example, ext3 and older ext4 support only 32k and 64k subdirectories
    52  	// per directory, respectively. Newer FS typically have no such limitation,
    53  	// but still.
    54  	//
    55  	// For example, r1828 will end up in baseDir/r1XXX/r1828.
    56  	return filepath.Join(
    57  		baseDir,
    58  		"sideloading",
    59  		fmt.Sprintf("r%dXXXX", rangeID/10000), // sharding
    60  		fmt.Sprintf("r%d", rangeID),
    61  	)
    62  }
    63  
    64  func exists(path string) (bool, error) {
    65  	_, err := os.Stat(path)
    66  	if err == nil {
    67  		return true, nil
    68  	}
    69  	if os.IsNotExist(err) {
    70  		return false, nil
    71  	}
    72  	return false, err
    73  }
    74  
    75  func newDiskSideloadStorage(
    76  	st *cluster.Settings,
    77  	rangeID roachpb.RangeID,
    78  	replicaID roachpb.ReplicaID,
    79  	baseDir string,
    80  	limiter *rate.Limiter,
    81  	eng storage.Engine,
    82  ) (*diskSideloadStorage, error) {
    83  	path := deprecatedSideloadedPath(baseDir, rangeID, replicaID)
    84  	newPath := sideloadedPath(baseDir, rangeID)
    85  	// NB: this call to exists() is in the hot path when the server starts
    86  	// as it will be called once for each replica. However, during steady
    87  	// state (i.e. when the version variable hasn't *just* flipped), we're
    88  	// expecting `path` to not exist (since it refers to the legacy path at
    89  	// the moment). A stat call for a directory that doesn't exist isn't
    90  	// very expensive (on the order of 1000s of ns). For example, on a 2017
    91  	// MacBook Pro, this case averages ~3245ns and on a gceworker it's
    92  	// ~1200ns. At 50k replicas, that's on the order of a tenth of a second;
    93  	// not enough to matter.
    94  	//
    95  	// On the other hand, successful (i.e. directory found) calls take ~23k
    96  	// ns on my laptop, but only around 2.2k ns on the gceworker. Still,
    97  	// even on the laptop, 50k replicas would only add 1.2s which is also
    98  	// acceptable given that it'll happen only once.
    99  	exists, err := exists(path)
   100  	if err != nil {
   101  		return nil, errors.Wrap(err, "checking pre-migration sideloaded directory")
   102  	}
   103  	if exists {
   104  		if err := os.MkdirAll(filepath.Dir(newPath), 0755); err != nil {
   105  			return nil, errors.Wrap(err, "creating migrated sideloaded directory")
   106  		}
   107  		if err := os.Rename(path, newPath); err != nil {
   108  			return nil, errors.Wrap(err, "while migrating sideloaded directory")
   109  		}
   110  	}
   111  	path = newPath
   112  
   113  	ss := &diskSideloadStorage{
   114  		dir:     path,
   115  		eng:     eng,
   116  		st:      st,
   117  		limiter: limiter,
   118  	}
   119  	return ss, nil
   120  }
   121  
   122  func (ss *diskSideloadStorage) createDir() error {
   123  	err := os.MkdirAll(ss.dir, 0755)
   124  	ss.dirCreated = ss.dirCreated || err == nil
   125  	return err
   126  }
   127  
   128  // Dir implements SideloadStorage.
   129  func (ss *diskSideloadStorage) Dir() string {
   130  	return ss.dir
   131  }
   132  
   133  // Put implements SideloadStorage.
   134  func (ss *diskSideloadStorage) Put(ctx context.Context, index, term uint64, contents []byte) error {
   135  	filename := ss.filename(ctx, index, term)
   136  	// There's a chance the whole path is missing (for example after Clear()),
   137  	// in which case handle that transparently.
   138  	for {
   139  		// Use 0644 since that's what RocksDB uses:
   140  		// https://github.com/facebook/rocksdb/blob/56656e12d67d8a63f1e4c4214da9feeec2bd442b/env/env_posix.cc#L171
   141  		if err := writeFileSyncing(ctx, filename, contents, ss.eng, 0644, ss.st, ss.limiter); err == nil {
   142  			return nil
   143  		} else if !os.IsNotExist(err) {
   144  			return err
   145  		}
   146  		// createDir() ensures ss.dir exists but will not create any subdirectories
   147  		// within ss.dir because filename() does not make subdirectories in ss.dir.
   148  		if err := ss.createDir(); err != nil {
   149  			return err
   150  		}
   151  		continue
   152  	}
   153  }
   154  
   155  // Get implements SideloadStorage.
   156  func (ss *diskSideloadStorage) Get(ctx context.Context, index, term uint64) ([]byte, error) {
   157  	filename := ss.filename(ctx, index, term)
   158  	b, err := ss.eng.ReadFile(filename)
   159  	if os.IsNotExist(err) {
   160  		return nil, errSideloadedFileNotFound
   161  	}
   162  	return b, err
   163  }
   164  
   165  // Filename implements SideloadStorage.
   166  func (ss *diskSideloadStorage) Filename(ctx context.Context, index, term uint64) (string, error) {
   167  	return ss.filename(ctx, index, term), nil
   168  }
   169  
   170  func (ss *diskSideloadStorage) filename(ctx context.Context, index, term uint64) string {
   171  	return filepath.Join(ss.dir, fmt.Sprintf("i%d.t%d", index, term))
   172  }
   173  
   174  // Purge implements SideloadStorage.
   175  func (ss *diskSideloadStorage) Purge(ctx context.Context, index, term uint64) (int64, error) {
   176  	return ss.purgeFile(ctx, ss.filename(ctx, index, term))
   177  }
   178  
   179  func (ss *diskSideloadStorage) fileSize(filename string) (int64, error) {
   180  	// TODO(tschottdorf): this should all be done through the env. As written,
   181  	// the sizes returned here will be wrong if encryption is on. We want the
   182  	// size of the unencrypted payload.
   183  	//
   184  	// See #31913.
   185  	info, err := os.Stat(filename)
   186  	if err != nil {
   187  		if os.IsNotExist(err) {
   188  			return 0, errSideloadedFileNotFound
   189  		}
   190  		return 0, err
   191  	}
   192  	return info.Size(), nil
   193  }
   194  
   195  func (ss *diskSideloadStorage) purgeFile(ctx context.Context, filename string) (int64, error) {
   196  	size, err := ss.fileSize(filename)
   197  	if err != nil {
   198  		return 0, err
   199  	}
   200  	if err := ss.eng.Remove(filename); err != nil {
   201  		if os.IsNotExist(err) {
   202  			return 0, errSideloadedFileNotFound
   203  		}
   204  		return 0, err
   205  	}
   206  	return size, nil
   207  }
   208  
   209  // Clear implements SideloadStorage.
   210  func (ss *diskSideloadStorage) Clear(_ context.Context) error {
   211  	// TODO(jackson): Update this and the rest of `os.` filesystem calls in
   212  	// this impl to use ss.eng.
   213  	err := os.RemoveAll(ss.dir)
   214  	ss.dirCreated = ss.dirCreated && err != nil
   215  	return err
   216  }
   217  
   218  // TruncateTo implements SideloadStorage.
   219  func (ss *diskSideloadStorage) TruncateTo(
   220  	ctx context.Context, firstIndex uint64,
   221  ) (bytesFreed, bytesRetained int64, _ error) {
   222  	deletedAll := true
   223  	if err := ss.forEach(ctx, func(index uint64, filename string) error {
   224  		if index >= firstIndex {
   225  			size, err := ss.fileSize(filename)
   226  			if err != nil {
   227  				return err
   228  			}
   229  			bytesRetained += size
   230  			deletedAll = false
   231  			return nil
   232  		}
   233  		fileSize, err := ss.purgeFile(ctx, filename)
   234  		if err != nil {
   235  			return err
   236  		}
   237  		bytesFreed += fileSize
   238  		return nil
   239  	}); err != nil {
   240  		return 0, 0, err
   241  	}
   242  
   243  	if deletedAll {
   244  		// The directory may not exist, or it may exist and have been empty.
   245  		// Not worth trying to figure out which one, just try to delete.
   246  		err := os.Remove(ss.dir)
   247  		if !os.IsNotExist(err) {
   248  			return bytesFreed, 0, errors.Wrapf(err, "while purging %q", ss.dir)
   249  		}
   250  	}
   251  	return bytesFreed, bytesRetained, nil
   252  }
   253  
   254  func (ss *diskSideloadStorage) forEach(
   255  	ctx context.Context, visit func(index uint64, filename string) error,
   256  ) error {
   257  	matches, err := filepath.Glob(filepath.Join(ss.dir, "i*.t*"))
   258  	if err != nil {
   259  		return err
   260  	}
   261  	for _, match := range matches {
   262  		base := filepath.Base(match)
   263  		if len(base) < 1 || base[0] != 'i' {
   264  			continue
   265  		}
   266  		base = base[1:]
   267  		upToDot := strings.SplitN(base, ".", 2)
   268  		logIdx, err := strconv.ParseUint(upToDot[0], 10, 64)
   269  		if err != nil {
   270  			return errors.Wrapf(err, "while parsing %q during TruncateTo", match)
   271  		}
   272  		if err := visit(logIdx, match); err != nil {
   273  			return errors.Wrapf(err, "matching pattern %q", match)
   274  		}
   275  	}
   276  	return nil
   277  }
   278  
   279  // String lists the files in the storage without guaranteeing an ordering.
   280  func (ss *diskSideloadStorage) String() string {
   281  	var buf strings.Builder
   282  	var count int
   283  	if err := ss.forEach(context.Background(), func(_ uint64, filename string) error {
   284  		count++
   285  		_, _ = fmt.Fprintln(&buf, filename)
   286  		return nil
   287  	}); err != nil {
   288  		return err.Error()
   289  	}
   290  	fmt.Fprintf(&buf, "(%d files)\n", count)
   291  	return buf.String()
   292  }