github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/file_table_persister.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"bytes"
    26  	"context"
    27  	"errors"
    28  	"io"
    29  	"io/fs"
    30  	"os"
    31  	"path"
    32  	"path/filepath"
    33  	"strings"
    34  	"sync"
    35  	"time"
    36  
    37  	"github.com/dolthub/dolt/go/libraries/utils/file"
    38  	"github.com/dolthub/dolt/go/store/chunks"
    39  	"github.com/dolthub/dolt/go/store/hash"
    40  	"github.com/dolthub/dolt/go/store/util/tempfiles"
    41  )
    42  
    43  const tempTablePrefix = "nbs_table_"
    44  
    45  func newFSTablePersister(dir string, q MemoryQuotaProvider) tablePersister {
    46  	return &fsTablePersister{dir, q, sync.Mutex{}, nil, make(map[string]struct{})}
    47  }
    48  
    49  type fsTablePersister struct {
    50  	dir string
    51  	q   MemoryQuotaProvider
    52  
    53  	// Protects the following two maps.
    54  	removeMu sync.Mutex
    55  	// While we are running PruneTableFiles, any newly created table files are
    56  	// added to this map. The file delete loop will never delete anything which
    57  	// appears in this map. Files should be added to this map before they are
    58  	// written.
    59  	toKeep map[string]struct{}
    60  	// Any temp files we are currently writing are always present in this map.
    61  	// The logic should be taken before we generate the new temp file, and the
    62  	// new temp file should be added to this map. Care should be taken to always
    63  	// remove the entry from this map when we are done processing the temp file
    64  	// or else this map will grow without bound.
    65  	curTmps map[string]struct{}
    66  }
    67  
    68  var _ tablePersister = &fsTablePersister{}
    69  var _ tableFilePersister = &fsTablePersister{}
    70  
    71  func (ftp *fsTablePersister) Open(ctx context.Context, name hash.Hash, chunkCount uint32, stats *Stats) (chunkSource, error) {
    72  	return newFileTableReader(ctx, ftp.dir, name, chunkCount, ftp.q)
    73  }
    74  
    75  func (ftp *fsTablePersister) Exists(ctx context.Context, name hash.Hash, chunkCount uint32, stats *Stats) (bool, error) {
    76  	ftp.removeMu.Lock()
    77  	defer ftp.removeMu.Unlock()
    78  	if ftp.toKeep != nil {
    79  		ftp.toKeep[filepath.Join(ftp.dir, name.String())] = struct{}{}
    80  	}
    81  	return tableFileExists(ctx, ftp.dir, name)
    82  }
    83  
    84  func (ftp *fsTablePersister) Persist(ctx context.Context, mt *memTable, haver chunkReader, stats *Stats) (chunkSource, error) {
    85  	t1 := time.Now()
    86  	defer stats.PersistLatency.SampleTimeSince(t1)
    87  
    88  	name, data, chunkCount, err := mt.write(haver, stats)
    89  	if err != nil {
    90  		return emptyChunkSource{}, err
    91  	}
    92  
    93  	return ftp.persistTable(ctx, name, data, chunkCount, stats)
    94  }
    95  
    96  func (ftp *fsTablePersister) Path() string {
    97  	return ftp.dir
    98  }
    99  
   100  func (ftp *fsTablePersister) CopyTableFile(ctx context.Context, r io.Reader, fileId string, fileSz uint64, chunkCount uint32) error {
   101  	tn, f, err := func() (n string, cleanup func(), err error) {
   102  		ftp.removeMu.Lock()
   103  		var temp *os.File
   104  		temp, err = tempfiles.MovableTempFileProvider.NewFile(ftp.dir, tempTablePrefix)
   105  		if err != nil {
   106  			ftp.removeMu.Unlock()
   107  			return "", func() {}, err
   108  		}
   109  		ftp.curTmps[filepath.Clean(temp.Name())] = struct{}{}
   110  		ftp.removeMu.Unlock()
   111  
   112  		cleanup = func() {
   113  			ftp.removeMu.Lock()
   114  			delete(ftp.curTmps, filepath.Clean(temp.Name()))
   115  			ftp.removeMu.Unlock()
   116  		}
   117  
   118  		defer func() {
   119  			cerr := temp.Close()
   120  			if err == nil {
   121  				err = cerr
   122  			}
   123  		}()
   124  
   125  		_, err = io.Copy(temp, r)
   126  		if err != nil {
   127  			return "", cleanup, err
   128  		}
   129  
   130  		err = temp.Sync()
   131  		if err != nil {
   132  			return "", cleanup, err
   133  		}
   134  
   135  		return temp.Name(), cleanup, nil
   136  	}()
   137  	defer f()
   138  	if err != nil {
   139  		return err
   140  	}
   141  
   142  	path := filepath.Join(ftp.dir, fileId)
   143  	ftp.removeMu.Lock()
   144  	if ftp.toKeep != nil {
   145  		ftp.toKeep[filepath.Clean(path)] = struct{}{}
   146  	}
   147  	defer ftp.removeMu.Unlock()
   148  	return file.Rename(tn, path)
   149  }
   150  
   151  func (ftp *fsTablePersister) TryMoveCmpChunkTableWriter(ctx context.Context, filename string, w *CmpChunkTableWriter) error {
   152  	path := filepath.Join(ftp.dir, filename)
   153  	ftp.removeMu.Lock()
   154  	if ftp.toKeep != nil {
   155  		ftp.toKeep[filepath.Clean(path)] = struct{}{}
   156  	}
   157  	defer ftp.removeMu.Unlock()
   158  	return w.FlushToFile(path)
   159  }
   160  
   161  func (ftp *fsTablePersister) persistTable(ctx context.Context, name hash.Hash, data []byte, chunkCount uint32, stats *Stats) (cs chunkSource, err error) {
   162  	if chunkCount == 0 {
   163  		return emptyChunkSource{}, nil
   164  	}
   165  
   166  	tempName, f, err := func() (tempName string, cleanup func(), ferr error) {
   167  		ftp.removeMu.Lock()
   168  		var temp *os.File
   169  		temp, ferr = tempfiles.MovableTempFileProvider.NewFile(ftp.dir, tempTablePrefix)
   170  		if ferr != nil {
   171  			ftp.removeMu.Unlock()
   172  			return "", func() {}, ferr
   173  		}
   174  		ftp.curTmps[filepath.Clean(temp.Name())] = struct{}{}
   175  		ftp.removeMu.Unlock()
   176  
   177  		cleanup = func() {
   178  			ftp.removeMu.Lock()
   179  			delete(ftp.curTmps, filepath.Clean(temp.Name()))
   180  			ftp.removeMu.Unlock()
   181  		}
   182  
   183  		defer func() {
   184  			closeErr := temp.Close()
   185  			if ferr == nil {
   186  				ferr = closeErr
   187  			}
   188  		}()
   189  
   190  		_, ferr = io.Copy(temp, bytes.NewReader(data))
   191  		if ferr != nil {
   192  			return "", cleanup, ferr
   193  		}
   194  
   195  		ferr = temp.Sync()
   196  		if ferr != nil {
   197  			return "", cleanup, ferr
   198  		}
   199  
   200  		return temp.Name(), cleanup, nil
   201  	}()
   202  	defer f()
   203  	if err != nil {
   204  		return nil, err
   205  	}
   206  
   207  	newName := filepath.Join(ftp.dir, name.String())
   208  	ftp.removeMu.Lock()
   209  	if ftp.toKeep != nil {
   210  		ftp.toKeep[filepath.Clean(newName)] = struct{}{}
   211  	}
   212  	err = file.Rename(tempName, newName)
   213  	ftp.removeMu.Unlock()
   214  	if err != nil {
   215  		return nil, err
   216  	}
   217  
   218  	return ftp.Open(ctx, name, chunkCount, stats)
   219  }
   220  
   221  func (ftp *fsTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) {
   222  	plan, err := planRangeCopyConjoin(sources, stats)
   223  	if err != nil {
   224  		return emptyChunkSource{}, nil, err
   225  	}
   226  
   227  	if plan.chunkCount == 0 {
   228  		return emptyChunkSource{}, func() {}, nil
   229  	}
   230  
   231  	name := nameFromSuffixes(plan.suffixes())
   232  	tempName, f, err := func() (tempName string, cleanup func(), ferr error) {
   233  		ftp.removeMu.Lock()
   234  		var temp *os.File
   235  		temp, ferr = tempfiles.MovableTempFileProvider.NewFile(ftp.dir, tempTablePrefix)
   236  		if ferr != nil {
   237  			ftp.removeMu.Unlock()
   238  			return "", func() {}, ferr
   239  		}
   240  		ftp.curTmps[filepath.Clean(temp.Name())] = struct{}{}
   241  		ftp.removeMu.Unlock()
   242  
   243  		cleanup = func() {
   244  			ftp.removeMu.Lock()
   245  			delete(ftp.curTmps, filepath.Clean(temp.Name()))
   246  			ftp.removeMu.Unlock()
   247  		}
   248  
   249  		defer func() {
   250  			closeErr := temp.Close()
   251  			if ferr == nil {
   252  				ferr = closeErr
   253  			}
   254  		}()
   255  
   256  		for _, sws := range plan.sources.sws {
   257  			var r io.ReadCloser
   258  			r, _, ferr = sws.source.reader(ctx)
   259  			if ferr != nil {
   260  				return "", cleanup, ferr
   261  			}
   262  
   263  			n, ferr := io.CopyN(temp, r, int64(sws.dataLen))
   264  			if ferr != nil {
   265  				r.Close()
   266  				return "", cleanup, ferr
   267  			}
   268  
   269  			if uint64(n) != sws.dataLen {
   270  				r.Close()
   271  				return "", cleanup, errors.New("failed to copy all data")
   272  			}
   273  
   274  			err := r.Close()
   275  			if err != nil {
   276  				return "", cleanup, err
   277  			}
   278  		}
   279  
   280  		_, ferr = temp.Write(plan.mergedIndex)
   281  
   282  		if ferr != nil {
   283  			return "", cleanup, ferr
   284  		}
   285  
   286  		ferr = temp.Sync()
   287  		if ferr != nil {
   288  			return "", cleanup, ferr
   289  		}
   290  
   291  		return temp.Name(), cleanup, nil
   292  	}()
   293  	defer f()
   294  	if err != nil {
   295  		return nil, nil, err
   296  	}
   297  
   298  	path := filepath.Join(ftp.dir, name.String())
   299  	ftp.removeMu.Lock()
   300  	if ftp.toKeep != nil {
   301  		ftp.toKeep[filepath.Clean(path)] = struct{}{}
   302  	}
   303  	err = file.Rename(tempName, path)
   304  	if err != nil {
   305  		return nil, nil, err
   306  	}
   307  	ftp.removeMu.Unlock()
   308  
   309  	cs, err := ftp.Open(ctx, name, plan.chunkCount, stats)
   310  	if err != nil {
   311  		return nil, nil, err
   312  	}
   313  	return cs, func() {
   314  		for _, s := range sources {
   315  			file.Remove(filepath.Join(ftp.dir, s.hash().String()))
   316  		}
   317  	}, nil
   318  }
   319  
   320  func (ftp *fsTablePersister) PruneTableFiles(ctx context.Context, keeper func() []hash.Hash, mtime time.Time) error {
   321  	ftp.removeMu.Lock()
   322  	if ftp.toKeep != nil {
   323  		ftp.removeMu.Unlock()
   324  		return errors.New("shallow gc already in progress")
   325  	}
   326  	ftp.toKeep = make(map[string]struct{})
   327  	ftp.removeMu.Unlock()
   328  
   329  	defer func() {
   330  		ftp.removeMu.Lock()
   331  		ftp.toKeep = nil
   332  		ftp.removeMu.Unlock()
   333  	}()
   334  
   335  	toKeep := make(map[string]struct{})
   336  	for _, k := range keeper() {
   337  		toKeep[filepath.Clean(filepath.Join(ftp.dir, k.String()))] = struct{}{}
   338  	}
   339  
   340  	ftp.removeMu.Lock()
   341  	for f := range toKeep {
   342  		ftp.toKeep[f] = struct{}{}
   343  	}
   344  	ftp.removeMu.Unlock()
   345  
   346  	fileInfos, err := os.ReadDir(ftp.dir)
   347  	if err != nil {
   348  		return err
   349  	}
   350  
   351  	ea := make(gcErrAccum)
   352  
   353  	unfilteredTableFiles := make([]string, 0)
   354  	unfilteredTempFiles := make([]string, 0)
   355  
   356  	for _, info := range fileInfos {
   357  		if info.IsDir() {
   358  			continue
   359  		}
   360  
   361  		filePath := path.Join(ftp.dir, info.Name())
   362  
   363  		if strings.HasPrefix(info.Name(), tempTablePrefix) {
   364  			unfilteredTempFiles = append(unfilteredTempFiles, filePath)
   365  			continue
   366  		}
   367  
   368  		if len(info.Name()) != 32 {
   369  			continue // not a table file
   370  		}
   371  
   372  		if _, ok := hash.MaybeParse(info.Name()); !ok {
   373  			continue // not a table file
   374  		}
   375  
   376  		i, err := info.Info()
   377  		if err != nil {
   378  			ea.add(filePath, err)
   379  			continue
   380  		}
   381  
   382  		ctime := i.ModTime()
   383  		if ctime.After(mtime) {
   384  			continue // file has been updated more recently than our cutoff time
   385  		}
   386  
   387  		unfilteredTableFiles = append(unfilteredTableFiles, filePath)
   388  	}
   389  
   390  	for _, p := range unfilteredTempFiles {
   391  		ftp.removeMu.Lock()
   392  		if _, ok := ftp.curTmps[filepath.Clean(p)]; !ok {
   393  			err := file.Remove(p)
   394  			if err != nil && !errors.Is(err, fs.ErrNotExist) {
   395  				ea.add(p, err)
   396  			}
   397  		}
   398  		ftp.removeMu.Unlock()
   399  	}
   400  
   401  	for _, p := range unfilteredTableFiles {
   402  		ftp.removeMu.Lock()
   403  		if _, ok := ftp.toKeep[filepath.Clean(p)]; !ok {
   404  			err := file.Remove(p)
   405  			if err != nil && !errors.Is(err, fs.ErrNotExist) {
   406  				ea.add(p, err)
   407  			}
   408  		}
   409  		ftp.removeMu.Unlock()
   410  	}
   411  
   412  	if !ea.isEmpty() {
   413  		return ea
   414  	}
   415  
   416  	return nil
   417  }
   418  
   419  func (ftp *fsTablePersister) Close() error {
   420  	return nil
   421  }
   422  
   423  func (ftp *fsTablePersister) AccessMode() chunks.ExclusiveAccessMode {
   424  	return chunks.ExclusiveAccessMode_Shared
   425  }