github.com/matrixorigin/matrixone@v1.2.0/pkg/fileservice/disk_cache.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fileservice
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"io"
    22  	"io/fs"
    23  	"os"
    24  	"path/filepath"
    25  	"strings"
    26  	"sync"
    27  	"syscall"
    28  
    29  	"github.com/cespare/xxhash/v2"
    30  	"github.com/matrixorigin/matrixone/pkg/fileservice/fifocache"
    31  	"github.com/matrixorigin/matrixone/pkg/logutil"
    32  	"github.com/matrixorigin/matrixone/pkg/perfcounter"
    33  	metric "github.com/matrixorigin/matrixone/pkg/util/metric/v2"
    34  	"go.uber.org/zap"
    35  )
    36  
    37  type DiskCache struct {
    38  	path            string
    39  	perfCounterSets []*perfcounter.CounterSet
    40  
    41  	updatingPaths struct {
    42  		*sync.Cond
    43  		m map[string]bool
    44  	}
    45  
    46  	cache *fifocache.Cache[string, struct{}]
    47  }
    48  
    49  func NewDiskCache(
    50  	ctx context.Context,
    51  	path string,
    52  	capacity int,
    53  	perfCounterSets []*perfcounter.CounterSet,
    54  ) (ret *DiskCache, err error) {
    55  
    56  	err = os.MkdirAll(path, 0755)
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  
    61  	ret = &DiskCache{
    62  		path:            path,
    63  		perfCounterSets: perfCounterSets,
    64  
    65  		cache: fifocache.New(
    66  			capacity,
    67  			func(path string, _ struct{}) {
    68  				err := os.Remove(path)
    69  				if err == nil {
    70  					perfcounter.Update(ctx, func(set *perfcounter.CounterSet) {
    71  						set.FileService.Cache.Disk.Evict.Add(1)
    72  					}, perfCounterSets...)
    73  				}
    74  			},
    75  			func(key string) uint8 {
    76  				return uint8(xxhash.Sum64String(key))
    77  			},
    78  		),
    79  	}
    80  	ret.updatingPaths.Cond = sync.NewCond(new(sync.Mutex))
    81  	ret.updatingPaths.m = make(map[string]bool)
    82  
    83  	ret.loadCache()
    84  
    85  	return ret, nil
    86  }
    87  
    88  func (d *DiskCache) loadCache() {
    89  
    90  	_ = filepath.WalkDir(d.path, func(path string, entry os.DirEntry, err error) error {
    91  		if err != nil {
    92  			return nil //ignore
    93  		}
    94  		if entry.IsDir() {
    95  			// try remove if empty. for cleaning old structure
    96  			if path != d.path {
    97  				os.Remove(path)
    98  			}
    99  			return nil
   100  		}
   101  		if !strings.HasSuffix(entry.Name(), cacheFileSuffix) {
   102  			return nil
   103  		}
   104  		info, err := entry.Info()
   105  		if err != nil {
   106  			return nil // ignore
   107  		}
   108  
   109  		d.cache.Set(path, struct{}{}, int(fileSize(info)))
   110  
   111  		return nil
   112  	})
   113  
   114  }
   115  
   116  var _ IOVectorCache = new(DiskCache)
   117  
   118  func (d *DiskCache) Read(
   119  	ctx context.Context,
   120  	vector *IOVector,
   121  ) (
   122  	err error,
   123  ) {
   124  
   125  	if vector.Policy.Any(SkipDiskCacheReads) {
   126  		return nil
   127  	}
   128  
   129  	var numHit, numRead, numOpenIOEntry, numOpenFull, numError int64
   130  	defer func() {
   131  		metric.FSReadHitDiskCounter.Add(float64(numHit))
   132  		perfcounter.Update(ctx, func(c *perfcounter.CounterSet) {
   133  			c.FileService.Cache.Read.Add(numRead)
   134  			c.FileService.Cache.Hit.Add(numHit)
   135  			c.FileService.Cache.Disk.Read.Add(numRead)
   136  			c.FileService.Cache.Disk.Hit.Add(numHit)
   137  			c.FileService.Cache.Disk.Error.Add(numError)
   138  			c.FileService.Cache.Disk.OpenIOEntryFile.Add(numOpenIOEntry)
   139  			c.FileService.Cache.Disk.OpenFullFile.Add(numOpenFull)
   140  		}, d.perfCounterSets...)
   141  	}()
   142  
   143  	path, err := ParsePath(vector.FilePath)
   144  	if err != nil {
   145  		return err
   146  	}
   147  
   148  	openedFiles := make(map[string]*os.File)
   149  	defer func() {
   150  		for _, file := range openedFiles {
   151  			_ = file.Close()
   152  		}
   153  	}()
   154  
   155  	fillEntry := func(entry *IOEntry) error {
   156  		if entry.done {
   157  			return nil
   158  		}
   159  		if entry.Size < 0 {
   160  			// ignore size unknown entry
   161  			return nil
   162  		}
   163  
   164  		numRead++
   165  
   166  		var file *os.File
   167  
   168  		// entry file
   169  		diskPath := d.pathForIOEntry(path.File, *entry)
   170  		if f, ok := openedFiles[diskPath]; ok {
   171  			// use opened file
   172  			_, err = file.Seek(entry.Offset, io.SeekStart)
   173  			if err == nil {
   174  				file = f
   175  			}
   176  		} else {
   177  			// open file
   178  			d.waitUpdateComplete(diskPath)
   179  			diskFile, err := os.Open(diskPath)
   180  			if err == nil {
   181  				file = diskFile
   182  				defer func() {
   183  					openedFiles[diskPath] = diskFile
   184  				}()
   185  				numOpenIOEntry++
   186  			}
   187  		}
   188  
   189  		if file == nil {
   190  			// try full file
   191  			diskPath = d.pathForFile(path.File)
   192  			if f, ok := openedFiles[diskPath]; ok {
   193  				// use opened file
   194  				_, err = f.Seek(entry.Offset, io.SeekStart)
   195  				if err == nil {
   196  					file = f
   197  				}
   198  			} else {
   199  				// open file
   200  				d.waitUpdateComplete(diskPath)
   201  				diskFile, err := os.Open(diskPath)
   202  				if err == nil {
   203  					defer func() {
   204  						openedFiles[diskPath] = diskFile
   205  					}()
   206  					numOpenFull++
   207  					// seek
   208  					_, err = diskFile.Seek(entry.Offset, io.SeekStart)
   209  					if err == nil {
   210  						file = diskFile
   211  					}
   212  				}
   213  			}
   214  		}
   215  
   216  		if file == nil {
   217  			// no file available
   218  			return nil
   219  		}
   220  
   221  		if _, ok := d.cache.Get(diskPath); !ok {
   222  			// set cache
   223  			stat, err := file.Stat()
   224  			if err != nil {
   225  				return err
   226  			}
   227  			d.cache.Set(diskPath, struct{}{}, int(fileSize(stat)))
   228  		}
   229  
   230  		if err := entry.ReadFromOSFile(file); err != nil {
   231  			// ignore error
   232  			numError++
   233  			logutil.Warn("read disk cache error", zap.Any("error", err))
   234  			return nil
   235  		}
   236  
   237  		entry.done = true
   238  		entry.fromCache = d
   239  		numHit++
   240  
   241  		return nil
   242  	}
   243  
   244  	for i := range vector.Entries {
   245  		if err := fillEntry(&vector.Entries[i]); err != nil {
   246  			return err
   247  		}
   248  	}
   249  
   250  	return nil
   251  }
   252  
   253  func (d *DiskCache) Update(
   254  	ctx context.Context,
   255  	vector *IOVector,
   256  	async bool,
   257  ) (
   258  	err error,
   259  ) {
   260  
   261  	if vector.Policy.Any(SkipDiskCacheWrites) {
   262  		return nil
   263  	}
   264  
   265  	path, err := ParsePath(vector.FilePath)
   266  	if err != nil {
   267  		return err
   268  	}
   269  
   270  	// callback
   271  	var onWritten []OnDiskCacheWrittenFunc
   272  	if v := ctx.Value(CtxKeyDiskCacheCallbacks); v != nil {
   273  		onWritten = v.(*DiskCacheCallbacks).OnWritten
   274  	}
   275  
   276  	for _, entry := range vector.Entries {
   277  		if len(entry.Data) == 0 {
   278  			// no data
   279  			continue
   280  		}
   281  		if entry.Size < 0 {
   282  			// ignore size unknown entry
   283  			continue
   284  		}
   285  		if entry.fromCache == d {
   286  			// no need to update
   287  			continue
   288  		}
   289  
   290  		diskPath := d.pathForIOEntry(path.File, entry)
   291  		written, err := d.writeFile(ctx, diskPath, func(context.Context) (io.ReadCloser, error) {
   292  			return io.NopCloser(bytes.NewReader(entry.Data)), nil
   293  		})
   294  		if err != nil {
   295  			return err
   296  		}
   297  		if written {
   298  			for _, fn := range onWritten {
   299  				fn(vector.FilePath, entry)
   300  			}
   301  		}
   302  
   303  	}
   304  
   305  	return nil
   306  }
   307  
   308  func (d *DiskCache) writeFile(
   309  	ctx context.Context,
   310  	diskPath string,
   311  	openReader func(context.Context) (io.ReadCloser, error),
   312  ) (bool, error) {
   313  	var numCreate, numStat, numError, numWrite int64
   314  	defer func() {
   315  		perfcounter.Update(ctx, func(set *perfcounter.CounterSet) {
   316  			set.FileService.Cache.Disk.CreateFile.Add(numCreate)
   317  			set.FileService.Cache.Disk.StatFile.Add(numStat)
   318  			set.FileService.Cache.Disk.WriteFile.Add(numWrite)
   319  			set.FileService.Cache.Disk.Error.Add(numError)
   320  		})
   321  	}()
   322  
   323  	doneUpdate := d.startUpdate(diskPath)
   324  	defer doneUpdate()
   325  
   326  	if _, ok := d.cache.Get(diskPath); ok {
   327  		// already exists
   328  		return false, nil
   329  	}
   330  	stat, err := os.Stat(diskPath)
   331  	if err == nil {
   332  		// file exists
   333  		d.cache.Set(diskPath, struct{}{}, int(fileSize(stat)))
   334  		numStat++
   335  		return false, nil
   336  	}
   337  
   338  	// write data
   339  	dir := filepath.Dir(diskPath)
   340  	err = os.MkdirAll(dir, 0755)
   341  	if err != nil {
   342  		numError++
   343  		logutil.Warn("write disk cache error", zap.Any("error", err))
   344  		return false, nil // ignore error
   345  	}
   346  	f, err := os.CreateTemp(dir, "*")
   347  	if err != nil {
   348  		numError++
   349  		logutil.Warn("write disk cache error", zap.Any("error", err))
   350  		return false, nil // ignore error
   351  	}
   352  	numCreate++
   353  	from, err := openReader(ctx)
   354  	if err != nil {
   355  		numError++
   356  		logutil.Warn("write disk cache error", zap.Any("error", err))
   357  		return false, nil // ignore error
   358  	}
   359  	defer from.Close()
   360  	var buf []byte
   361  	put := ioBufferPool.Get(&buf)
   362  	defer put.Put()
   363  	_, err = io.CopyBuffer(f, from, buf)
   364  	if err != nil {
   365  		f.Close()
   366  		os.Remove(f.Name())
   367  		numError++
   368  		logutil.Warn("write disk cache error", zap.Any("error", err))
   369  		return false, nil // ignore error
   370  	}
   371  
   372  	if err := f.Sync(); err != nil {
   373  		numError++
   374  		logutil.Warn("write disk cache error", zap.Any("error", err))
   375  		return false, nil // ignore error
   376  	}
   377  
   378  	// set cache
   379  	stat, err = f.Stat()
   380  	if err != nil {
   381  		numError++
   382  		logutil.Warn("write disk cache error", zap.Any("error", err))
   383  		return false, nil // ignore error
   384  	}
   385  	d.cache.Set(diskPath, struct{}{}, int(fileSize(stat)))
   386  
   387  	if err := f.Close(); err != nil {
   388  		numError++
   389  		logutil.Warn("write disk cache error", zap.Any("error", err))
   390  		return false, nil // ignore error
   391  	}
   392  	if err := os.Rename(f.Name(), diskPath); err != nil {
   393  		numError++
   394  		logutil.Warn("write disk cache error", zap.Any("error", err))
   395  		return false, nil // ignore error
   396  	}
   397  	logutil.Debug("disk cache file written",
   398  		zap.Any("path", diskPath),
   399  	)
   400  
   401  	numWrite++
   402  
   403  	return true, nil
   404  }
   405  
   406  func (d *DiskCache) Flush() {
   407  }
   408  
   409  const cacheFileSuffix = ".mofscache"
   410  
   411  func (d *DiskCache) pathForIOEntry(path string, entry IOEntry) string {
   412  	if entry.Size < 0 {
   413  		panic("should not cache size -1 entry")
   414  	}
   415  	return filepath.Join(
   416  		d.path,
   417  		fmt.Sprintf("%d-%d%s%s", entry.Offset, entry.Size, toOSPath(path), cacheFileSuffix),
   418  	)
   419  }
   420  
   421  func (d *DiskCache) pathForFile(path string) string {
   422  	return filepath.Join(
   423  		d.path,
   424  		fmt.Sprintf("full%s%s", toOSPath(path), cacheFileSuffix),
   425  	)
   426  }
   427  
   428  var ErrNotCacheFile = errorStr("not a cache file")
   429  
   430  func (d *DiskCache) decodeFilePath(diskPath string) (string, error) {
   431  	path, err := filepath.Rel(d.path, diskPath)
   432  	if err != nil {
   433  		return "", err
   434  	}
   435  	if !strings.HasPrefix(path, "full") {
   436  		return "", ErrNotCacheFile
   437  	}
   438  	path = strings.TrimPrefix(path, "full")
   439  	path = strings.TrimSuffix(path, cacheFileSuffix)
   440  	return fromOSPath(path), nil
   441  }
   442  
   443  func (d *DiskCache) waitUpdateComplete(path string) {
   444  	d.updatingPaths.L.Lock()
   445  	for d.updatingPaths.m[path] {
   446  		d.updatingPaths.Wait()
   447  	}
   448  	d.updatingPaths.L.Unlock()
   449  }
   450  
   451  func (d *DiskCache) startUpdate(path string) (done func()) {
   452  	d.updatingPaths.L.Lock()
   453  	for d.updatingPaths.m[path] {
   454  		d.updatingPaths.Wait()
   455  	}
   456  	d.updatingPaths.m[path] = true
   457  	d.updatingPaths.L.Unlock()
   458  	done = func() {
   459  		d.updatingPaths.L.Lock()
   460  		delete(d.updatingPaths.m, path)
   461  		d.updatingPaths.Broadcast()
   462  		d.updatingPaths.L.Unlock()
   463  	}
   464  	return
   465  }
   466  
   467  var _ FileCache = new(DiskCache)
   468  
   469  func (d *DiskCache) SetFile(
   470  	ctx context.Context,
   471  	path string,
   472  	openReader func(context.Context) (io.ReadCloser, error),
   473  ) error {
   474  	diskPath := d.pathForFile(path)
   475  	_, err := d.writeFile(ctx, diskPath, openReader)
   476  	if err != nil {
   477  		return err
   478  	}
   479  	return nil
   480  }
   481  
   482  func (d *DiskCache) DeletePaths(
   483  	ctx context.Context,
   484  	paths []string,
   485  ) error {
   486  
   487  	for _, path := range paths {
   488  		diskPath := d.pathForFile(path)
   489  		//TODO also delete IOEntry files
   490  
   491  		doneUpdate := d.startUpdate(diskPath)
   492  		defer doneUpdate()
   493  
   494  		if err := os.Remove(diskPath); err != nil {
   495  			if !os.IsNotExist(err) {
   496  				return err
   497  			}
   498  		}
   499  		d.cache.Delete(diskPath)
   500  	}
   501  
   502  	return nil
   503  }
   504  
   505  func fileSize(info fs.FileInfo) int64 {
   506  	if sys, ok := info.Sys().(*syscall.Stat_t); ok {
   507  		return int64(sys.Blocks) * 512 // it's always 512, not sys.Blksize
   508  	}
   509  	return info.Size()
   510  }