github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/file_manifest.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"context"
    26  	"errors"
    27  	"fmt"
    28  	"io"
    29  	"os"
    30  	"path/filepath"
    31  	"strings"
    32  	"time"
    33  
    34  	"github.com/dolthub/fslock"
    35  
    36  	"github.com/dolthub/dolt/go/libraries/utils/file"
    37  	"github.com/dolthub/dolt/go/store/chunks"
    38  	"github.com/dolthub/dolt/go/store/hash"
    39  	"github.com/dolthub/dolt/go/store/util/tempfiles"
    40  )
    41  
    42  const (
    43  	manifestFileName = "manifest"
    44  	lockFileName     = "LOCK"
    45  	lockFileTimeout  = time.Millisecond * 100
    46  
    47  	storageVersion4 = "4"
    48  
    49  	prefixLen = 5
    50  )
    51  
    52  var ErrUnreadableManifest = errors.New("could not read file manifest")
    53  
    54  type manifestChecker func(upstream, contents manifestContents) error
    55  
    56  // ParseManifest parses a manifest file from the supplied reader
    57  func ParseManifest(r io.Reader) (ManifestInfo, error) {
    58  	return parseManifest(r)
    59  }
    60  
    61  func MaybeMigrateFileManifest(ctx context.Context, dir string) (bool, error) {
    62  	_, err := os.Stat(filepath.Join(dir, manifestFileName))
    63  	if os.IsNotExist(err) {
    64  		// no manifest exists, no need to migrate
    65  		return false, nil
    66  	} else if err != nil {
    67  		return false, err
    68  	}
    69  
    70  	_, contents, err := parseIfExists(ctx, dir, nil)
    71  	if err != nil {
    72  		return false, err
    73  	}
    74  
    75  	if contents.manifestVers == StorageVersion {
    76  		// already on v5, no need to migrate
    77  		return false, nil
    78  	}
    79  
    80  	check := func(_, contents manifestContents) error {
    81  		if !contents.gcGen.IsEmpty() {
    82  			return errors.New("migrating from v4 to v5 should result in a manifest with a 0 gcGen")
    83  		}
    84  
    85  		return nil
    86  	}
    87  
    88  	_, err = updateWithChecker(ctx, dir, syncFlush, check, contents.lock, contents, nil)
    89  
    90  	if err != nil {
    91  		return false, err
    92  	}
    93  
    94  	return true, err
    95  }
    96  
    97  // getFileManifest makes a new file manifest.
    98  func getFileManifest(ctx context.Context, dir string, mode updateMode) (m manifest, err error) {
    99  	lock := fslock.New(filepath.Join(dir, lockFileName))
   100  	m = fileManifest{dir: dir, mode: mode, lock: lock}
   101  
   102  	var f *os.File
   103  	f, err = openIfExists(filepath.Join(dir, manifestFileName))
   104  	if err != nil {
   105  		return nil, err
   106  	} else if f == nil {
   107  		return m, nil
   108  	}
   109  	defer func() {
   110  		// keep first error
   111  		if cerr := f.Close(); err == nil {
   112  			err = cerr
   113  		}
   114  	}()
   115  
   116  	var ok bool
   117  	ok, _, err = m.ParseIfExists(ctx, &Stats{}, nil)
   118  	if err != nil {
   119  		return nil, err
   120  	} else if !ok {
   121  		return nil, ErrUnreadableManifest
   122  	}
   123  	return
   124  }
   125  
   126  type updateMode byte
   127  
   128  const (
   129  	asyncFlush updateMode = 0
   130  	syncFlush  updateMode = 1
   131  )
   132  
   133  type fileManifest struct {
   134  	dir  string
   135  	mode updateMode
   136  	lock *fslock.Lock
   137  }
   138  
   139  // Returns nil if path does not exist
   140  func openIfExists(path string) (*os.File, error) {
   141  	f, err := os.Open(path)
   142  	if os.IsNotExist(err) {
   143  		return nil, nil
   144  	} else if err != nil {
   145  		return nil, err
   146  	}
   147  	return f, err
   148  }
   149  
   150  func (fm fileManifest) Name() string {
   151  	return fm.dir
   152  }
   153  
   154  // ParseIfExists looks for a LOCK and manifest file in fm.dir. If it finds
   155  // them, it takes the lock, parses the manifest and returns its contents,
   156  // setting |exists| to true. If not, it sets |exists| to false and returns. In
   157  // that case, the other return values are undefined. If |readHook| is non-nil,
   158  // it will be executed while ParseIfExists() holds the manifest file lock.
   159  // This is to allow for race condition testing.
   160  func (fm fileManifest) ParseIfExists(
   161  	ctx context.Context,
   162  	stats *Stats,
   163  	readHook func() error,
   164  ) (exists bool, contents manifestContents, err error) {
   165  	t1 := time.Now()
   166  	defer func() { stats.ReadManifestLatency.SampleTimeSince(t1) }()
   167  
   168  	// no file lock on the read path
   169  	return parseIfExists(ctx, fm.dir, readHook)
   170  }
   171  
   172  func (fm fileManifest) Update(ctx context.Context, lastLock hash.Hash, newContents manifestContents, stats *Stats, writeHook func() error) (mc manifestContents, err error) {
   173  	t1 := time.Now()
   174  	defer func() { stats.WriteManifestLatency.SampleTimeSince(t1) }()
   175  
   176  	// hold the file lock while we update
   177  	if err = tryFileLock(fm.lock); err != nil {
   178  		return manifestContents{}, err
   179  	}
   180  	defer func() {
   181  		if cerr := fm.lock.Unlock(); err == nil {
   182  			err = cerr // keep first error
   183  		}
   184  	}()
   185  
   186  	checker := func(upstream, contents manifestContents) error {
   187  		if contents.gcGen != upstream.gcGen {
   188  			return chunks.ErrGCGenerationExpired
   189  		}
   190  		return nil
   191  	}
   192  
   193  	return updateWithChecker(ctx, fm.dir, fm.mode, checker, lastLock, newContents, writeHook)
   194  }
   195  
   196  func (fm fileManifest) UpdateGCGen(ctx context.Context, lastLock hash.Hash, newContents manifestContents, stats *Stats, writeHook func() error) (mc manifestContents, err error) {
   197  	t1 := time.Now()
   198  	defer func() { stats.WriteManifestLatency.SampleTimeSince(t1) }()
   199  
   200  	// hold the file lock while we update
   201  	if err = tryFileLock(fm.lock); err != nil {
   202  		return manifestContents{}, err
   203  	}
   204  	defer func() {
   205  		if cerr := fm.lock.Unlock(); err == nil {
   206  			err = cerr // keep first error
   207  		}
   208  	}()
   209  
   210  	checker := func(upstream, contents manifestContents) error {
   211  		if contents.gcGen == upstream.gcGen {
   212  			return errors.New("UpdateGCGen() must update the garbage collection generation")
   213  		} else if contents.root != upstream.root {
   214  			return errors.New("UpdateGCGen() cannot update the root")
   215  		}
   216  		return nil
   217  	}
   218  
   219  	return updateWithChecker(ctx, fm.dir, fm.mode, checker, lastLock, newContents, writeHook)
   220  }
   221  
   222  // parseV5Manifest parses the v5 manifest from the Reader given. Assumes the first field (the manifest version and
   223  // following : character) have already been consumed by the reader.
   224  //
   225  // |-- String --|-- String --|-------- String --------|-------- String --------|-------- String -----------------|
   226  // | nbs version:Noms version:Base32-encoded lock hash:Base32-encoded root hash:Base32-encoded GC generation hash
   227  //
   228  // |-- String --|- String --|...|-- String --|- String --|
   229  // :table 1 hash:table 1 cnt:...:table N hash:table N cnt|
   230  func parseV5Manifest(r io.Reader) (manifestContents, error) {
   231  	manifest, err := io.ReadAll(r)
   232  
   233  	if err != nil {
   234  		return manifestContents{}, err
   235  	}
   236  
   237  	slices := strings.Split(string(manifest), ":")
   238  	if len(slices) < prefixLen-1 || len(slices)%2 != 0 {
   239  		return manifestContents{}, ErrCorruptManifest
   240  	}
   241  
   242  	specs, err := parseSpecs(slices[prefixLen-1:])
   243  	if err != nil {
   244  		return manifestContents{}, err
   245  	}
   246  
   247  	lock, ok := hash.MaybeParse(slices[1])
   248  	if !ok {
   249  		return manifestContents{}, fmt.Errorf("Could not parse lock hash: %s", slices[1])
   250  	}
   251  
   252  	gcGen, ok := hash.MaybeParse(slices[3])
   253  	if !ok {
   254  		return manifestContents{}, fmt.Errorf("Could not parse GC generation hash: %s", slices[3])
   255  	}
   256  
   257  	return manifestContents{
   258  		manifestVers: StorageVersion,
   259  		nbfVers:      slices[0],
   260  		lock:         lock,
   261  		root:         hash.Parse(slices[2]),
   262  		gcGen:        gcGen,
   263  		specs:        specs,
   264  	}, nil
   265  }
   266  
   267  // parseManifest parses the manifest bytes in the reader given and returns the contents. Consumes the first few bytes
   268  func parseManifest(r io.Reader) (manifestContents, error) {
   269  	var version []byte
   270  	buf := make([]byte, 1)
   271  
   272  	// Parse the manifest up to the : character
   273  	chars := 0
   274  	for ; chars < 8; chars++ {
   275  		_, err := r.Read(buf)
   276  		if err != nil {
   277  			return manifestContents{}, err
   278  		}
   279  		if buf[0] == ':' {
   280  			break
   281  		}
   282  		version = append(version, buf[0])
   283  	}
   284  	if chars >= 8 {
   285  		return manifestContents{}, ErrCorruptManifest
   286  	}
   287  
   288  	switch string(version) {
   289  	case storageVersion4:
   290  		return parseV4Manifest(r)
   291  	case StorageVersion:
   292  		return parseV5Manifest(r)
   293  	default:
   294  		return manifestContents{}, fmt.Errorf("Unknown manifest version: %s. You may need to update your client", string(version))
   295  	}
   296  }
   297  
   298  func writeManifest(temp io.Writer, contents manifestContents) error {
   299  	strs := make([]string, 2*len(contents.specs)+prefixLen)
   300  	strs[0], strs[1], strs[2], strs[3], strs[4] = StorageVersion, contents.nbfVers, contents.lock.String(), contents.root.String(), contents.gcGen.String()
   301  	tableInfo := strs[prefixLen:]
   302  	formatSpecs(contents.specs, tableInfo)
   303  	_, err := io.WriteString(temp, strings.Join(strs, ":"))
   304  
   305  	return err
   306  }
   307  
   308  // parseV4Manifest parses the v4 manifest from the Reader given. Assumes the first field (the manifest version and
   309  // following : character) have already been consumed by the reader.
   310  //
   311  // |-- String --|-- String --|-------- String --------|-------- String --------|-- String --|- String --|...|-- String --|- String --|
   312  // | nbs version:Noms version:Base32-encoded lock hash:Base32-encoded root hash:table 1 hash:table 1 cnt:...:table N hash:table N cnt|
   313  func parseV4Manifest(r io.Reader) (manifestContents, error) {
   314  	manifest, err := io.ReadAll(r)
   315  
   316  	if err != nil {
   317  		return manifestContents{}, err
   318  	}
   319  
   320  	slices := strings.Split(string(manifest), ":")
   321  	if len(slices) < 3 || len(slices)%2 == 0 {
   322  		return manifestContents{}, ErrCorruptManifest
   323  	}
   324  
   325  	specs, err := parseSpecs(slices[3:])
   326  
   327  	if err != nil {
   328  		return manifestContents{}, err
   329  	}
   330  
   331  	ad, ok := hash.MaybeParse(slices[1])
   332  	if !ok {
   333  		return manifestContents{}, fmt.Errorf("Could not parse lock hash: %s", slices[1])
   334  	}
   335  
   336  	return manifestContents{
   337  		manifestVers: storageVersion4,
   338  		nbfVers:      slices[0],
   339  		lock:         ad,
   340  		root:         hash.Parse(slices[2]),
   341  		specs:        specs,
   342  	}, nil
   343  }
   344  
   345  // parseIfExists parses the manifest file if it exists, callers must hold the file lock.
   346  func parseIfExists(_ context.Context, dir string, readHook func() error) (exists bool, contents manifestContents, err error) {
   347  	if readHook != nil {
   348  		if err = readHook(); err != nil {
   349  			return false, manifestContents{}, err
   350  		}
   351  	}
   352  
   353  	var f *os.File
   354  	if f, err = openIfExists(filepath.Join(dir, manifestFileName)); err != nil {
   355  		return false, manifestContents{}, err
   356  	} else if f == nil {
   357  		return false, manifestContents{}, nil
   358  	}
   359  	defer func() {
   360  		if cerr := f.Close(); err == nil {
   361  			err = cerr // keep first error
   362  		}
   363  	}()
   364  
   365  	contents, err = parseManifest(f)
   366  	if err != nil {
   367  		return false, contents, err
   368  	}
   369  	exists = true
   370  	return
   371  }
   372  
   373  // updateWithChecker updates the manifest if |validate| is satisfied, callers must hold the file lock.
   374  func updateWithChecker(_ context.Context, dir string, mode updateMode, validate manifestChecker, lastLock hash.Hash, newContents manifestContents, writeHook func() error) (mc manifestContents, err error) {
   375  	var tempManifestPath string
   376  
   377  	// Write a temporary manifest file, to be renamed over manifestFileName upon success.
   378  	// The closure here ensures this file is closed before moving on.
   379  	tempManifestPath, err = func() (name string, ferr error) {
   380  		var temp *os.File
   381  		temp, ferr = tempfiles.MovableTempFileProvider.NewFile(dir, "nbs_manifest_")
   382  		if ferr != nil {
   383  			return "", ferr
   384  		}
   385  
   386  		defer func() {
   387  			closeErr := temp.Close()
   388  
   389  			if ferr == nil {
   390  				ferr = closeErr
   391  			}
   392  		}()
   393  
   394  		ferr = writeManifest(temp, newContents)
   395  		if ferr != nil {
   396  			return "", ferr
   397  		}
   398  
   399  		if mode == syncFlush {
   400  			if ferr = temp.Sync(); ferr != nil {
   401  				return "", ferr
   402  			}
   403  		}
   404  
   405  		return temp.Name(), nil
   406  	}()
   407  
   408  	if err != nil {
   409  		return manifestContents{}, err
   410  	}
   411  
   412  	defer file.Remove(tempManifestPath) // If we rename below, this will be a no-op
   413  
   414  	// writeHook is for testing, allowing other code to slip in and try to do stuff while we hold the lock.
   415  	if writeHook != nil {
   416  		err = writeHook()
   417  
   418  		if err != nil {
   419  			return manifestContents{}, err
   420  		}
   421  	}
   422  
   423  	var upstream manifestContents
   424  	// Read current manifest (if it exists). The closure ensures that the file is closed before moving on, so we can rename over it later if need be.
   425  	manifestPath := filepath.Join(dir, manifestFileName)
   426  	upstream, err = func() (upstream manifestContents, ferr error) {
   427  		if f, ferr := openIfExists(manifestPath); ferr == nil && f != nil {
   428  			defer func() {
   429  				closeErr := f.Close()
   430  
   431  				if ferr == nil {
   432  					ferr = closeErr
   433  				}
   434  			}()
   435  
   436  			upstream, ferr = parseManifest(f)
   437  
   438  			if ferr != nil {
   439  				return manifestContents{}, ferr
   440  			}
   441  
   442  			if newContents.nbfVers != upstream.nbfVers {
   443  				return manifestContents{}, errors.New("Update cannot change manifest version")
   444  			}
   445  
   446  			return upstream, nil
   447  		} else if ferr != nil {
   448  			return manifestContents{}, ferr
   449  		}
   450  
   451  		if !lastLock.IsEmpty() {
   452  			return manifestContents{}, errors.New("new manifest created with non 0 lock")
   453  		}
   454  
   455  		return manifestContents{}, nil
   456  	}()
   457  
   458  	if err != nil {
   459  		return manifestContents{}, err
   460  	}
   461  
   462  	if lastLock != upstream.lock {
   463  		return upstream, nil
   464  	}
   465  
   466  	// this is where we assert that gcGen is correct
   467  	err = validate(upstream, newContents)
   468  
   469  	if err != nil {
   470  		return manifestContents{}, err
   471  	}
   472  
   473  	err = file.Rename(tempManifestPath, manifestPath)
   474  	if err != nil {
   475  		return manifestContents{}, err
   476  	}
   477  
   478  	if mode == syncFlush {
   479  		if err = file.SyncDirectoryHandle(dir); err != nil {
   480  			return manifestContents{}, err
   481  		}
   482  	}
   483  
   484  	return newContents, nil
   485  }
   486  
   487  func tryFileLock(lock *fslock.Lock) (err error) {
   488  	err = lock.LockWithTimeout(lockFileTimeout)
   489  	if errors.Is(err, fslock.ErrTimeout) {
   490  		err = errors.New("timed out reading database manifest")
   491  	}
   492  	return
   493  }