github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/file_manifest.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"context"
    26  	"errors"
    27  	"io"
    28  	"io/ioutil"
    29  	"os"
    30  	"path/filepath"
    31  	"strings"
    32  	"time"
    33  
    34  	"github.com/dolthub/fslock"
    35  
    36  	"github.com/dolthub/dolt/go/store/chunks"
    37  	"github.com/dolthub/dolt/go/store/hash"
    38  	"github.com/dolthub/dolt/go/store/util/tempfiles"
    39  )
    40  
    41  const (
    42  	manifestFileName = "manifest"
    43  	lockFileName     = "LOCK"
    44  
    45  	storageVersion4 = "4"
    46  
    47  	prefixLen = 5
    48  )
    49  
    50  var ErrUnreadableManifest = errors.New("could not read file manifest")
    51  
    52  type manifestParser func(r io.Reader) (manifestContents, error)
    53  type manifestWriter func(temp io.Writer, contents manifestContents) error
    54  type manifestChecker func(upstream, contents manifestContents) error
    55  
    56  // ParseManifest parses s a manifest file from the supplied reader
    57  func ParseManifest(r io.Reader) (ManifestInfo, error) {
    58  	fm4 := fileManifestV4{}
    59  	return fm4.parseManifest(r)
    60  }
    61  
    62  func MaybeMigrateFileManifest(ctx context.Context, dir string) (bool, error) {
    63  	_, err := os.Stat(filepath.Join(dir, manifestFileName))
    64  	if os.IsNotExist(err) {
    65  		// no manifest exists, no need to migrate
    66  		return false, nil
    67  	} else if err != nil {
    68  		return false, err
    69  	}
    70  
    71  	fm5 := fileManifestV5{dir}
    72  	ok, _, err := fm5.ParseIfExists(ctx, &Stats{}, nil)
    73  	if ok && err == nil {
    74  		// on v5, no need to migrate
    75  		return false, nil
    76  	}
    77  
    78  	fm4 := fileManifestV4{dir}
    79  	ok, contents, err := fm4.ParseIfExists(ctx, &Stats{}, nil)
    80  	if err != nil {
    81  		return false, err
    82  	}
    83  	if !ok {
    84  		// expected v4 or v5
    85  		return false, ErrUnreadableManifest
    86  	}
    87  
    88  	check := func(upstream, contents manifestContents) error {
    89  		if upstream.gcGen == contents.gcGen {
    90  			return errors.New("error migrating manifest")
    91  		}
    92  		return nil
    93  	}
    94  	contents.gcGen = contents.lock
    95  
    96  	_, err = updateWithParseWriterAndChecker(ctx, dir, fm5.writeManifest, fm4.parseManifest, check, contents.lock, contents, nil)
    97  
    98  	if err != nil {
    99  		return false, err
   100  	}
   101  
   102  	return true, err
   103  }
   104  
   105  // parse the manifest in its given format
   106  func getFileManifest(ctx context.Context, dir string) (manifest, error) {
   107  	f, err := openIfExists(filepath.Join(dir, manifestFileName))
   108  	if err != nil {
   109  		return nil, err
   110  	}
   111  	if f == nil {
   112  		// initialize empty repos with v4
   113  		return fileManifestV4{dir}, nil
   114  	}
   115  	defer func() {
   116  		err = f.Close()
   117  	}()
   118  
   119  	fm5 := fileManifestV5{dir}
   120  	ok, _, err := fm5.ParseIfExists(ctx, &Stats{}, nil)
   121  	if ok && err == nil {
   122  		return fm5, nil
   123  	}
   124  
   125  	fm4 := fileManifestV4{dir}
   126  	ok, _, err = fm4.ParseIfExists(ctx, &Stats{}, nil)
   127  	if ok && err == nil {
   128  		return fm4, nil
   129  	}
   130  
   131  	return nil, ErrUnreadableManifest
   132  }
   133  
   134  // fileManifestV5 provides access to a NomsBlockStore manifest stored on disk in |dir|. The format
   135  // is currently human readable. The prefix contains 5 strings, followed by pairs of table file
   136  // hashes and their counts:
   137  //
   138  // |-- String --|-- String --|-------- String --------|-------- String --------|-------- String -----------------|
   139  // | nbs version:Noms version:Base32-encoded lock hash:Base32-encoded root hash:Base32-encoded GC generation hash
   140  //
   141  // |-- String --|- String --|...|-- String --|- String --|
   142  // :table 1 hash:table 1 cnt:...:table N hash:table N cnt|
   143  type fileManifestV5 struct {
   144  	dir string
   145  }
   146  
   147  var _ manifestVersionGetter = &fileManifestV5{}
   148  
   149  func newLock(dir string) *fslock.Lock {
   150  	lockPath := filepath.Join(dir, lockFileName)
   151  	return fslock.New(lockPath)
   152  }
   153  
   154  func lockFileExists(dir string) (bool, error) {
   155  	lockPath := filepath.Join(dir, lockFileName)
   156  	info, err := os.Stat(lockPath)
   157  
   158  	if err != nil {
   159  		if os.IsNotExist(err) {
   160  			return false, nil
   161  		}
   162  
   163  		return false, errors.New("failed to determine if lock file exists")
   164  	} else if info.IsDir() {
   165  		return false, errors.New("lock file is a directory")
   166  	}
   167  
   168  	return true, nil
   169  }
   170  
   171  // Returns nil if path does not exist
   172  func openIfExists(path string) (*os.File, error) {
   173  	f, err := os.Open(path)
   174  	if os.IsNotExist(err) {
   175  		return nil, nil
   176  	} else if err != nil {
   177  		return nil, err
   178  	}
   179  
   180  	return f, err
   181  }
   182  
   183  func (fm5 fileManifestV5) Name() string {
   184  	return fm5.dir
   185  }
   186  
   187  func (fm5 fileManifestV5) GetManifestVersion() string {
   188  	return "5"
   189  }
   190  
   191  // ParseIfExists looks for a LOCK and manifest file in fm.dir. If it finds
   192  // them, it takes the lock, parses the manifest and returns its contents,
   193  // setting |exists| to true. If not, it sets |exists| to false and returns. In
   194  // that case, the other return values are undefined. If |readHook| is non-nil,
   195  // it will be executed while ParseIfExists() holds the manifest file lock.
   196  // This is to allow for race condition testing.
   197  func (fm5 fileManifestV5) ParseIfExists(ctx context.Context, stats *Stats, readHook func() error) (exists bool, contents manifestContents, err error) {
   198  	t1 := time.Now()
   199  	defer func() {
   200  		stats.ReadManifestLatency.SampleTimeSince(t1)
   201  	}()
   202  
   203  	return parseIfExistsWithParser(ctx, fm5.dir, fm5.parseManifest, readHook)
   204  }
   205  
   206  func (fm5 fileManifestV5) Update(ctx context.Context, lastLock addr, newContents manifestContents, stats *Stats, writeHook func() error) (mc manifestContents, err error) {
   207  	t1 := time.Now()
   208  	defer func() { stats.WriteManifestLatency.SampleTimeSince(t1) }()
   209  
   210  	checker := func(upstream, contents manifestContents) error {
   211  		if contents.gcGen != upstream.gcGen {
   212  			return chunks.ErrGCGenerationExpired
   213  		}
   214  		return nil
   215  	}
   216  
   217  	return updateWithParseWriterAndChecker(ctx, fm5.dir, fm5.writeManifest, fm5.parseManifest, checker, lastLock, newContents, writeHook)
   218  }
   219  
   220  func (fm5 fileManifestV5) UpdateGCGen(ctx context.Context, lastLock addr, newContents manifestContents, stats *Stats, writeHook func() error) (mc manifestContents, err error) {
   221  	t1 := time.Now()
   222  	defer func() { stats.WriteManifestLatency.SampleTimeSince(t1) }()
   223  
   224  	checker := func(upstream, contents manifestContents) error {
   225  		if contents.gcGen == upstream.gcGen {
   226  			return errors.New("UpdateGCGen() must update the garbage collection generation")
   227  		}
   228  		if contents.root != upstream.root {
   229  			return errors.New("UpdateGCGen() cannot update the root")
   230  		}
   231  		return nil
   232  	}
   233  
   234  	return updateWithParseWriterAndChecker(ctx, fm5.dir, fm5.writeManifest, fm5.parseManifest, checker, lastLock, newContents, writeHook)
   235  }
   236  
   237  func (fm5 fileManifestV5) parseManifest(r io.Reader) (manifestContents, error) {
   238  	manifest, err := ioutil.ReadAll(r)
   239  
   240  	if err != nil {
   241  		return manifestContents{}, err
   242  	}
   243  
   244  	slices := strings.Split(string(manifest), ":")
   245  	if len(slices) < prefixLen || len(slices)%2 != 1 {
   246  		return manifestContents{}, ErrCorruptManifest
   247  	}
   248  
   249  	if StorageVersion != string(slices[0]) {
   250  		return manifestContents{}, errors.New("invalid storage version")
   251  	}
   252  
   253  	specs, err := parseSpecs(slices[prefixLen:])
   254  	if err != nil {
   255  		return manifestContents{}, err
   256  	}
   257  
   258  	lock, err := parseAddr(slices[2])
   259  	if err != nil {
   260  		return manifestContents{}, err
   261  	}
   262  
   263  	gcGen, err := parseAddr(slices[4])
   264  	if err != nil {
   265  		return manifestContents{}, err
   266  	}
   267  
   268  	return manifestContents{
   269  		vers:  slices[1],
   270  		lock:  lock,
   271  		root:  hash.Parse(slices[3]),
   272  		gcGen: gcGen,
   273  		specs: specs,
   274  	}, nil
   275  }
   276  
   277  func (fm5 fileManifestV5) writeManifest(temp io.Writer, contents manifestContents) error {
   278  	strs := make([]string, 2*len(contents.specs)+prefixLen)
   279  	strs[0], strs[1], strs[2], strs[3], strs[4] = StorageVersion, contents.vers, contents.lock.String(), contents.root.String(), contents.gcGen.String()
   280  	tableInfo := strs[prefixLen:]
   281  	formatSpecs(contents.specs, tableInfo)
   282  	_, err := io.WriteString(temp, strings.Join(strs, ":"))
   283  
   284  	return err
   285  }
   286  
   287  // fileManifestV4 is the previous versions of the NomsBlockStore manifest.
   288  // The format is as follows:
   289  //
   290  // |-- String --|-- String --|-------- String --------|-------- String --------|-- String --|- String --|...|-- String --|- String --|
   291  // | nbs version:Noms version:Base32-encoded lock hash:Base32-encoded root hash:table 1 hash:table 1 cnt:...:table N hash:table N cnt|
   292  type fileManifestV4 struct {
   293  	dir string
   294  }
   295  
   296  var _ manifestVersionGetter = &fileManifestV4{}
   297  
   298  func (fm4 fileManifestV4) Name() string {
   299  	return fm4.dir
   300  }
   301  
   302  func (fm4 fileManifestV4) GetManifestVersion() string {
   303  	return "4"
   304  }
   305  
   306  func (fm4 fileManifestV4) ParseIfExists(ctx context.Context, stats *Stats, readHook func() error) (exists bool, contents manifestContents, err error) {
   307  	t1 := time.Now()
   308  	defer func() {
   309  		stats.ReadManifestLatency.SampleTimeSince(t1)
   310  	}()
   311  
   312  	return parseIfExistsWithParser(ctx, fm4.dir, fm4.parseManifest, readHook)
   313  }
   314  
   315  func (fm4 fileManifestV4) Update(ctx context.Context, lastLock addr, newContents manifestContents, stats *Stats, writeHook func() error) (mc manifestContents, err error) {
   316  	t1 := time.Now()
   317  	defer func() { stats.WriteManifestLatency.SampleTimeSince(t1) }()
   318  
   319  	noop := func(_, _ manifestContents) error {
   320  		return nil
   321  	}
   322  
   323  	return updateWithParseWriterAndChecker(ctx, fm4.dir, fm4.writeManifest, fm4.parseManifest, noop, lastLock, newContents, writeHook)
   324  }
   325  
   326  func (fm4 fileManifestV4) parseManifest(r io.Reader) (manifestContents, error) {
   327  	manifest, err := ioutil.ReadAll(r)
   328  
   329  	if err != nil {
   330  		return manifestContents{}, err
   331  	}
   332  
   333  	slices := strings.Split(string(manifest), ":")
   334  	if len(slices) < 4 || len(slices)%2 == 1 {
   335  		return manifestContents{}, ErrCorruptManifest
   336  	}
   337  
   338  	if storageVersion4 != string(slices[0]) {
   339  		return manifestContents{}, errors.New("invalid storage version")
   340  	}
   341  
   342  	specs, err := parseSpecs(slices[4:])
   343  
   344  	if err != nil {
   345  		return manifestContents{}, err
   346  	}
   347  
   348  	ad, err := parseAddr(slices[2])
   349  
   350  	if err != nil {
   351  		return manifestContents{}, err
   352  	}
   353  
   354  	return manifestContents{
   355  		vers:  slices[1],
   356  		lock:  ad,
   357  		root:  hash.Parse(slices[3]),
   358  		specs: specs,
   359  	}, nil
   360  }
   361  
   362  func (fm4 fileManifestV4) writeManifest(temp io.Writer, contents manifestContents) error {
   363  	strs := make([]string, 2*len(contents.specs)+4)
   364  	strs[0], strs[1], strs[2], strs[3] = storageVersion4, contents.vers, contents.lock.String(), contents.root.String()
   365  	tableInfo := strs[4:]
   366  	formatSpecs(contents.specs, tableInfo)
   367  	_, err := io.WriteString(temp, strings.Join(strs, ":"))
   368  
   369  	return err
   370  }
   371  
   372  func parseIfExistsWithParser(_ context.Context, dir string, parse manifestParser, readHook func() error) (exists bool, contents manifestContents, err error) {
   373  	var locked bool
   374  	locked, err = lockFileExists(dir)
   375  
   376  	if err != nil {
   377  		return false, manifestContents{}, err
   378  	}
   379  
   380  	// !exists(lockFileName) => uninitialized store
   381  	if locked {
   382  		var f *os.File
   383  		err = func() (ferr error) {
   384  			lck := newLock(dir)
   385  			ferr = lck.Lock()
   386  			if ferr != nil {
   387  				return ferr
   388  			}
   389  
   390  			defer func() {
   391  				unlockErr := lck.Unlock()
   392  				if ferr == nil {
   393  					ferr = unlockErr
   394  				}
   395  			}()
   396  
   397  			if readHook != nil {
   398  				ferr = readHook()
   399  
   400  				if ferr != nil {
   401  					return ferr
   402  				}
   403  			}
   404  
   405  			f, ferr = openIfExists(filepath.Join(dir, manifestFileName))
   406  			if ferr != nil {
   407  				return ferr
   408  			}
   409  			return nil
   410  		}()
   411  
   412  		if err != nil {
   413  			return exists, contents, err
   414  		}
   415  
   416  		if f != nil {
   417  			defer func() {
   418  				closeErr := f.Close()
   419  
   420  				if err == nil {
   421  					err = closeErr
   422  				}
   423  			}()
   424  
   425  			exists = true
   426  
   427  			contents, err = parse(f)
   428  
   429  			if err != nil {
   430  				return false, contents, err
   431  			}
   432  		}
   433  	}
   434  	return exists, contents, nil
   435  }
   436  
   437  func updateWithParseWriterAndChecker(_ context.Context, dir string, write manifestWriter, parse manifestParser, validate manifestChecker, lastLock addr, newContents manifestContents, writeHook func() error) (mc manifestContents, err error) {
   438  	var tempManifestPath string
   439  
   440  	// Write a temporary manifest file, to be renamed over manifestFileName upon success.
   441  	// The closure here ensures this file is closed before moving on.
   442  	tempManifestPath, err = func() (name string, ferr error) {
   443  		var temp *os.File
   444  		temp, ferr = tempfiles.MovableTempFileProvider.NewFile(dir, "nbs_manifest_")
   445  
   446  		if ferr != nil {
   447  			return "", ferr
   448  		}
   449  
   450  		defer func() {
   451  			closeErr := temp.Close()
   452  
   453  			if ferr == nil {
   454  				ferr = closeErr
   455  			}
   456  		}()
   457  
   458  		ferr = write(temp, newContents)
   459  
   460  		if ferr != nil {
   461  			return "", ferr
   462  		}
   463  
   464  		return temp.Name(), nil
   465  	}()
   466  
   467  	if err != nil {
   468  		return manifestContents{}, err
   469  	}
   470  
   471  	defer os.Remove(tempManifestPath) // If we rename below, this will be a no-op
   472  
   473  	// Take manifest file lock
   474  	lck := newLock(dir)
   475  	err = lck.Lock()
   476  
   477  	if err != nil {
   478  		return manifestContents{}, err
   479  	}
   480  
   481  	defer func() {
   482  		unlockErr := lck.Unlock()
   483  
   484  		if err == nil {
   485  			err = unlockErr
   486  		}
   487  	}()
   488  
   489  	// writeHook is for testing, allowing other code to slip in and try to do stuff while we hold the lock.
   490  	if writeHook != nil {
   491  		err = writeHook()
   492  
   493  		if err != nil {
   494  			return manifestContents{}, err
   495  		}
   496  	}
   497  
   498  	var upstream manifestContents
   499  	// Read current manifest (if it exists). The closure ensures that the file is closed before moving on, so we can rename over it later if need be.
   500  	manifestPath := filepath.Join(dir, manifestFileName)
   501  	upstream, err = func() (upstream manifestContents, ferr error) {
   502  		if f, ferr := openIfExists(manifestPath); ferr == nil && f != nil {
   503  			defer func() {
   504  				closeErr := f.Close()
   505  
   506  				if ferr == nil {
   507  					ferr = closeErr
   508  				}
   509  			}()
   510  
   511  			upstream, ferr = parse(f)
   512  
   513  			if ferr != nil {
   514  				return manifestContents{}, ferr
   515  			}
   516  
   517  			if newContents.vers != upstream.vers {
   518  				return manifestContents{}, errors.New("Update cannot change manifest version")
   519  			}
   520  
   521  			return upstream, nil
   522  		} else if ferr != nil {
   523  			return manifestContents{}, ferr
   524  		}
   525  
   526  		if lastLock != (addr{}) {
   527  			return manifestContents{}, errors.New("new manifest created with non 0 lock")
   528  		}
   529  
   530  		return manifestContents{}, nil
   531  	}()
   532  
   533  	if err != nil {
   534  		return manifestContents{}, err
   535  	}
   536  
   537  	if lastLock != upstream.lock {
   538  		return upstream, nil
   539  	}
   540  
   541  	// this is where we assert that gcGen is unchanged
   542  	err = validate(upstream, newContents)
   543  
   544  	if err != nil {
   545  		return manifestContents{}, err
   546  	}
   547  
   548  	err = os.Rename(tempManifestPath, manifestPath)
   549  	if err != nil {
   550  		// On Windows, renaming the temporary manifest file to the current manifest file overwrites the current file.
   551  		// This can occasionally cause an "ACCESS DENIED" error, aborting the entire operation. The cause is not clear,
   552  		// as it does not appear to be a dangling file handle (observed through Process Explorer). The error is also
   553  		// hard to reproduce and inconsistent, as it seems to occur between 200-7,000 renames, but averages around 1,500
   554  		// renames. This adds a delay before retrying the rename, increasing the wait time by a factor of 10 after each
   555  		// failure, up to a max of 10 seconds. If an error still occurs after that time, then we just fail. It is
   556  		// unknown if this is sufficient enough to completely eliminate the issue, however it has so far been able to
   557  		// succeed before reaching the retry limit.
   558  		for waitTime := time.Duration(1); err != nil && waitTime <= 10000; waitTime *= 10 {
   559  			time.Sleep(waitTime * time.Millisecond)
   560  			err = os.Rename(tempManifestPath, manifestPath)
   561  		}
   562  		if err != nil {
   563  			return manifestContents{}, err
   564  		}
   565  	}
   566  
   567  	return newContents, nil
   568  }