github.com/matrixorigin/matrixone@v1.2.0/pkg/fileservice/local_etl_fs.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fileservice
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"io"
    21  	"os"
    22  	pathpkg "path"
    23  	"path/filepath"
    24  	"sort"
    25  	"strings"
    26  	"sync"
    27  	"sync/atomic"
    28  
    29  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    30  )
    31  
    32  // LocalETLFS is a FileService implementation backed by local file system and suitable for ETL operations
    33  type LocalETLFS struct {
    34  	name     string
    35  	rootPath string
    36  
    37  	sync.RWMutex
    38  	dirFiles map[string]*os.File
    39  }
    40  
    41  var _ FileService = new(LocalETLFS)
    42  
    43  func NewLocalETLFS(name string, rootPath string) (*LocalETLFS, error) {
    44  
    45  	// get absolute path
    46  	if rootPath != "" {
    47  		var err error
    48  		rootPath, err = filepath.Abs(rootPath)
    49  		if err != nil {
    50  			return nil, err
    51  		}
    52  
    53  		// ensure dir
    54  		f, err := os.Open(rootPath)
    55  		if os.IsNotExist(err) {
    56  			// not exists, create
    57  			err := os.MkdirAll(rootPath, 0755)
    58  			if err != nil {
    59  				return nil, err
    60  			}
    61  
    62  		} else if err != nil {
    63  			// stat error
    64  			return nil, err
    65  
    66  		} else {
    67  			defer f.Close()
    68  		}
    69  
    70  	}
    71  
    72  	return &LocalETLFS{
    73  		name:     name,
    74  		rootPath: rootPath,
    75  		dirFiles: make(map[string]*os.File),
    76  	}, nil
    77  }
    78  
    79  func (l *LocalETLFS) Name() string {
    80  	return l.name
    81  }
    82  
    83  func (l *LocalETLFS) Close() {
    84  }
    85  
    86  func (l *LocalETLFS) Write(ctx context.Context, vector IOVector) error {
    87  	select {
    88  	case <-ctx.Done():
    89  		return ctx.Err()
    90  	default:
    91  	}
    92  
    93  	path, err := ParsePathAtService(vector.FilePath, l.name)
    94  	if err != nil {
    95  		return err
    96  	}
    97  	nativePath := l.toNativeFilePath(path.File)
    98  
    99  	// check existence
   100  	_, err = os.Stat(nativePath)
   101  	if err == nil {
   102  		// existed
   103  		return moerr.NewFileAlreadyExistsNoCtx(path.File)
   104  	}
   105  
   106  	return l.write(ctx, vector)
   107  }
   108  
   109  func (l *LocalETLFS) write(ctx context.Context, vector IOVector) error {
   110  	path, err := ParsePathAtService(vector.FilePath, l.name)
   111  	if err != nil {
   112  		return err
   113  	}
   114  	nativePath := l.toNativeFilePath(path.File)
   115  
   116  	// sort
   117  	sort.Slice(vector.Entries, func(i, j int) bool {
   118  		return vector.Entries[i].Offset < vector.Entries[j].Offset
   119  	})
   120  
   121  	// size
   122  	var size int64
   123  	if len(vector.Entries) > 0 {
   124  		last := vector.Entries[len(vector.Entries)-1]
   125  		size = int64(last.Offset + last.Size)
   126  	}
   127  
   128  	r := newIOEntriesReader(ctx, vector.Entries)
   129  
   130  	// write
   131  	f, err := os.CreateTemp(
   132  		l.rootPath,
   133  		".tmp.*",
   134  	)
   135  	if err != nil {
   136  		return err
   137  	}
   138  	var buf []byte
   139  	put := ioBufferPool.Get(&buf)
   140  	defer put.Put()
   141  	n, err := io.CopyBuffer(f, r, buf)
   142  	if err != nil {
   143  		return err
   144  	}
   145  	if n != size {
   146  		sizeUnknown := false
   147  		for _, entry := range vector.Entries {
   148  			if entry.Size < 0 {
   149  				sizeUnknown = true
   150  				break
   151  			}
   152  		}
   153  		if !sizeUnknown {
   154  			return moerr.NewSizeNotMatchNoCtx(path.File)
   155  		}
   156  	}
   157  	if err := f.Close(); err != nil {
   158  		return err
   159  	}
   160  
   161  	// ensure parent dir
   162  	parentDir, _ := filepath.Split(nativePath)
   163  	err = l.ensureDir(parentDir)
   164  	if err != nil {
   165  		return err
   166  	}
   167  
   168  	// move
   169  	if err := os.Rename(f.Name(), nativePath); err != nil {
   170  		return err
   171  	}
   172  
   173  	if err := l.syncDir(parentDir); err != nil {
   174  		return err
   175  	}
   176  
   177  	return nil
   178  }
   179  
   180  func (l *LocalETLFS) Read(ctx context.Context, vector *IOVector) error {
   181  	select {
   182  	case <-ctx.Done():
   183  		return ctx.Err()
   184  	default:
   185  	}
   186  
   187  	if len(vector.Entries) == 0 {
   188  		return moerr.NewEmptyVectorNoCtx()
   189  	}
   190  
   191  	path, err := ParsePathAtService(vector.FilePath, l.name)
   192  	if err != nil {
   193  		return err
   194  	}
   195  	nativePath := l.toNativeFilePath(path.File)
   196  
   197  	_, err = os.Stat(nativePath)
   198  	if os.IsNotExist(err) {
   199  		return moerr.NewFileNotFoundNoCtx(path.File)
   200  	}
   201  	if err != nil {
   202  		return err
   203  	}
   204  
   205  	for i, entry := range vector.Entries {
   206  		if entry.Size == 0 {
   207  			return moerr.NewEmptyRangeNoCtx(path.File)
   208  		}
   209  
   210  		if entry.done {
   211  			continue
   212  		}
   213  
   214  		if entry.WriterForRead != nil {
   215  			f, err := os.Open(nativePath)
   216  			if os.IsNotExist(err) {
   217  				return moerr.NewFileNotFoundNoCtx(path.File)
   218  			}
   219  			if err != nil {
   220  				return err
   221  			}
   222  			defer f.Close()
   223  			if entry.Offset > 0 {
   224  				if _, err := f.Seek(int64(entry.Offset), io.SeekStart); err != nil {
   225  					return err
   226  				}
   227  			}
   228  			r := (io.Reader)(f)
   229  			if entry.Size > 0 {
   230  				r = io.LimitReader(r, int64(entry.Size))
   231  			}
   232  
   233  			if entry.ToCacheData != nil {
   234  				r = io.TeeReader(r, entry.WriterForRead)
   235  				counter := new(atomic.Int64)
   236  				cr := &countingReader{
   237  					R: r,
   238  					C: counter,
   239  				}
   240  				cacheData, err := entry.ToCacheData(cr, nil, DefaultCacheDataAllocator)
   241  				if err != nil {
   242  					return err
   243  				}
   244  				vector.Entries[i].CachedData = cacheData
   245  				if entry.Size > 0 && counter.Load() != entry.Size {
   246  					return moerr.NewUnexpectedEOFNoCtx(path.File)
   247  				}
   248  
   249  			} else {
   250  				var buf []byte
   251  				put := ioBufferPool.Get(&buf)
   252  				defer put.Put()
   253  				n, err := io.CopyBuffer(entry.WriterForRead, r, buf)
   254  				if err != nil {
   255  					return err
   256  				}
   257  				if entry.Size > 0 && n != int64(entry.Size) {
   258  					return moerr.NewUnexpectedEOFNoCtx(path.File)
   259  				}
   260  			}
   261  
   262  		} else if entry.ReadCloserForRead != nil {
   263  			f, err := os.Open(nativePath)
   264  			if os.IsNotExist(err) {
   265  				return moerr.NewFileNotFoundNoCtx(path.File)
   266  			}
   267  			if err != nil {
   268  				return err
   269  			}
   270  			if entry.Offset > 0 {
   271  				if _, err := f.Seek(int64(entry.Offset), io.SeekStart); err != nil {
   272  					return err
   273  				}
   274  			}
   275  			r := (io.Reader)(f)
   276  			if entry.Size > 0 {
   277  				r = io.LimitReader(r, int64(entry.Size))
   278  			}
   279  			if entry.ToCacheData == nil {
   280  				*entry.ReadCloserForRead = &readCloser{
   281  					r:         r,
   282  					closeFunc: f.Close,
   283  				}
   284  			} else {
   285  				buf := new(bytes.Buffer)
   286  				*entry.ReadCloserForRead = &readCloser{
   287  					r: io.TeeReader(r, buf),
   288  					closeFunc: func() error {
   289  						defer f.Close()
   290  						cacheData, err := entry.ToCacheData(buf, buf.Bytes(), DefaultCacheDataAllocator)
   291  						if err != nil {
   292  							return err
   293  						}
   294  						vector.Entries[i].CachedData = cacheData
   295  						return nil
   296  					},
   297  				}
   298  			}
   299  
   300  		} else {
   301  			f, err := os.Open(nativePath)
   302  			if os.IsNotExist(err) {
   303  				return moerr.NewFileNotFoundNoCtx(path.File)
   304  			}
   305  			if err != nil {
   306  				return err
   307  			}
   308  			defer f.Close()
   309  
   310  			if entry.Offset > 0 {
   311  				_, err = f.Seek(int64(entry.Offset), io.SeekStart)
   312  				if err != nil {
   313  					return err
   314  				}
   315  			}
   316  			r := (io.Reader)(f)
   317  			if entry.Size > 0 {
   318  				r = io.LimitReader(r, int64(entry.Size))
   319  			}
   320  
   321  			if entry.Size < 0 {
   322  				data, err := io.ReadAll(r)
   323  				if err != nil {
   324  					return err
   325  				}
   326  				entry.Data = data
   327  				entry.Size = int64(len(data))
   328  
   329  			} else {
   330  				if int64(len(entry.Data)) < entry.Size {
   331  					entry.Data = make([]byte, entry.Size)
   332  				}
   333  				n, err := io.ReadFull(r, entry.Data)
   334  				if err != nil {
   335  					return err
   336  				}
   337  				if int64(n) != entry.Size {
   338  					return moerr.NewUnexpectedEOFNoCtx(path.File)
   339  				}
   340  			}
   341  
   342  			if err := entry.setCachedData(); err != nil {
   343  				return err
   344  			}
   345  
   346  			vector.Entries[i] = entry
   347  		}
   348  
   349  	}
   350  
   351  	return nil
   352  
   353  }
   354  
   355  func (l *LocalETLFS) ReadCache(ctx context.Context, vector *IOVector) error {
   356  	return nil
   357  }
   358  
   359  func (l *LocalETLFS) StatFile(ctx context.Context, filePath string) (*DirEntry, error) {
   360  	select {
   361  	case <-ctx.Done():
   362  		return nil, ctx.Err()
   363  	default:
   364  	}
   365  
   366  	path, err := ParsePathAtService(filePath, l.name)
   367  	if err != nil {
   368  		return nil, err
   369  	}
   370  	nativePath := l.toNativeFilePath(path.File)
   371  
   372  	stat, err := os.Stat(nativePath)
   373  	if os.IsNotExist(err) {
   374  		return nil, moerr.NewFileNotFoundNoCtx(path.File)
   375  	}
   376  	if err != nil {
   377  		return nil, err
   378  	}
   379  
   380  	if stat.IsDir() {
   381  		return nil, moerr.NewFileNotFoundNoCtx(path.File)
   382  	}
   383  
   384  	return &DirEntry{
   385  		Name:  pathpkg.Base(filePath),
   386  		IsDir: false,
   387  		Size:  stat.Size(),
   388  	}, nil
   389  }
   390  
   391  func (l *LocalETLFS) PrefetchFile(ctx context.Context, filePath string) error {
   392  	return nil
   393  }
   394  
   395  func (l *LocalETLFS) List(ctx context.Context, dirPath string) (ret []DirEntry, err error) {
   396  	select {
   397  	case <-ctx.Done():
   398  		return nil, ctx.Err()
   399  	default:
   400  	}
   401  
   402  	path, err := ParsePathAtService(dirPath, l.name)
   403  	if err != nil {
   404  		return nil, err
   405  	}
   406  	nativePath := l.toNativeFilePath(path.File)
   407  
   408  	f, err := os.Open(nativePath)
   409  	if os.IsNotExist(err) {
   410  		err = nil
   411  		return
   412  	}
   413  	if err != nil {
   414  		return nil, err
   415  	}
   416  	defer f.Close()
   417  
   418  	entries, err := f.ReadDir(-1)
   419  	for _, entry := range entries {
   420  		name := entry.Name()
   421  		if strings.HasPrefix(name, ".") {
   422  			continue
   423  		}
   424  		info, err := entry.Info()
   425  		if err != nil {
   426  			return nil, err
   427  		}
   428  		isDir, err := entryIsDir(nativePath, name, info)
   429  		if err != nil {
   430  			return nil, err
   431  		}
   432  		ret = append(ret, DirEntry{
   433  			Name:  name,
   434  			IsDir: isDir,
   435  			Size:  info.Size(),
   436  		})
   437  	}
   438  
   439  	sort.Slice(ret, func(i, j int) bool {
   440  		return ret[i].Name < ret[j].Name
   441  	})
   442  
   443  	if err != nil {
   444  		return ret, err
   445  	}
   446  
   447  	return
   448  }
   449  
   450  func (l *LocalETLFS) Delete(ctx context.Context, filePaths ...string) error {
   451  	select {
   452  	case <-ctx.Done():
   453  		return ctx.Err()
   454  	default:
   455  	}
   456  
   457  	for _, filePath := range filePaths {
   458  		if err := l.deleteSingle(ctx, filePath); err != nil {
   459  			return err
   460  		}
   461  	}
   462  	return nil
   463  }
   464  
   465  func (l *LocalETLFS) deleteSingle(ctx context.Context, filePath string) error {
   466  	path, err := ParsePathAtService(filePath, l.name)
   467  	if err != nil {
   468  		return err
   469  	}
   470  	nativePath := l.toNativeFilePath(path.File)
   471  
   472  	_, err = os.Stat(nativePath)
   473  	if err != nil {
   474  		if os.IsNotExist(err) {
   475  			// ignore not found error
   476  			return nil
   477  		}
   478  		return err
   479  	}
   480  
   481  	err = os.Remove(nativePath)
   482  	if err != nil {
   483  		return err
   484  	}
   485  
   486  	parentDir, _ := filepath.Split(nativePath)
   487  	err = l.syncDir(parentDir)
   488  	if err != nil {
   489  		return err
   490  	}
   491  
   492  	return nil
   493  }
   494  
   495  func (l *LocalETLFS) ensureDir(nativePath string) error {
   496  	nativePath = filepath.Clean(nativePath)
   497  	if nativePath == "" {
   498  		return nil
   499  	}
   500  
   501  	// check existence by l.dirFiles
   502  	l.RLock()
   503  	_, ok := l.dirFiles[nativePath]
   504  	if ok {
   505  		// dir existed
   506  		l.RUnlock()
   507  		return nil
   508  	}
   509  	l.RUnlock()
   510  
   511  	// check existence by fstat
   512  	_, err := os.Stat(nativePath)
   513  	if err == nil {
   514  		// existed
   515  		return nil
   516  	}
   517  
   518  	// ensure parent
   519  	parent, _ := filepath.Split(nativePath)
   520  	if parent != nativePath {
   521  		if err := l.ensureDir(parent); err != nil {
   522  			return err
   523  		}
   524  	}
   525  
   526  	// create
   527  	if err := os.Mkdir(nativePath, 0755); err != nil {
   528  		if os.IsExist(err) {
   529  			// existed
   530  			return nil
   531  		}
   532  		return err
   533  	}
   534  
   535  	// sync parent dir
   536  	if err := l.syncDir(parent); err != nil {
   537  		return err
   538  	}
   539  
   540  	return nil
   541  }
   542  
   543  func (l *LocalETLFS) toOSPath(filePath string) string {
   544  	if os.PathSeparator == '/' {
   545  		return filePath
   546  	}
   547  	return strings.ReplaceAll(filePath, "/", osPathSeparatorStr)
   548  }
   549  
   550  func (l *LocalETLFS) syncDir(nativePath string) error {
   551  	l.Lock()
   552  	f, ok := l.dirFiles[nativePath]
   553  	if !ok {
   554  		var err error
   555  		f, err = os.Open(nativePath)
   556  		if err != nil {
   557  			l.Unlock()
   558  			return err
   559  		}
   560  		l.dirFiles[nativePath] = f
   561  	}
   562  	l.Unlock()
   563  	if err := f.Sync(); err != nil {
   564  		return err
   565  	}
   566  	return nil
   567  }
   568  
   569  func (l *LocalETLFS) toNativeFilePath(filePath string) string {
   570  	return filepath.Join(l.rootPath, l.toOSPath(filePath))
   571  }
   572  
   573  var _ ETLFileService = new(LocalETLFS)
   574  
   575  func (l *LocalETLFS) ETLCompatible() {}
   576  
   577  var _ MutableFileService = new(LocalETLFS)
   578  
   579  func (l *LocalETLFS) NewMutator(ctx context.Context, filePath string) (Mutator, error) {
   580  	path, err := ParsePathAtService(filePath, l.name)
   581  	if err != nil {
   582  		return nil, err
   583  	}
   584  	nativePath := l.toNativeFilePath(path.File)
   585  	f, err := os.OpenFile(nativePath, os.O_RDWR, 0644)
   586  	if os.IsNotExist(err) {
   587  		return nil, moerr.NewFileNotFoundNoCtx(path.File)
   588  	}
   589  	return &LocalETLFSMutator{
   590  		osFile: f,
   591  	}, nil
   592  }
   593  
   594  type LocalETLFSMutator struct {
   595  	osFile *os.File
   596  }
   597  
   598  func (l *LocalETLFSMutator) Mutate(ctx context.Context, entries ...IOEntry) error {
   599  	return l.mutate(ctx, 0, entries...)
   600  }
   601  
   602  func (l *LocalETLFSMutator) Append(ctx context.Context, entries ...IOEntry) error {
   603  	offset, err := l.osFile.Seek(0, io.SeekEnd)
   604  	if err != nil {
   605  		return err
   606  	}
   607  	return l.mutate(ctx, offset, entries...)
   608  }
   609  
   610  func (l *LocalETLFSMutator) mutate(ctx context.Context, baseOffset int64, entries ...IOEntry) error {
   611  	select {
   612  	case <-ctx.Done():
   613  		return ctx.Err()
   614  	default:
   615  	}
   616  
   617  	// write
   618  	for _, entry := range entries {
   619  
   620  		if entry.ReaderForWrite != nil {
   621  			// seek and copy
   622  			_, err := l.osFile.Seek(int64(entry.Offset+baseOffset), 0)
   623  			if err != nil {
   624  				return err
   625  			}
   626  			var buf []byte
   627  			put := ioBufferPool.Get(&buf)
   628  			defer put.Put()
   629  			n, err := io.CopyBuffer(l.osFile, entry.ReaderForWrite, buf)
   630  			if err != nil {
   631  				return err
   632  			}
   633  			if n != entry.Size {
   634  				return moerr.NewSizeNotMatchNoCtx("")
   635  			}
   636  
   637  		} else {
   638  			// WriteAt
   639  			n, err := l.osFile.WriteAt(entry.Data, int64(entry.Offset+baseOffset))
   640  			if err != nil {
   641  				return err
   642  			}
   643  			if int64(n) != entry.Size {
   644  				return moerr.NewSizeNotMatchNoCtx("")
   645  			}
   646  		}
   647  
   648  	}
   649  
   650  	return nil
   651  }
   652  
   653  func (l *LocalETLFSMutator) Close() error {
   654  	// sync
   655  	if err := l.osFile.Sync(); err != nil {
   656  		return err
   657  	}
   658  
   659  	// close
   660  	if err := l.osFile.Close(); err != nil {
   661  		return err
   662  	}
   663  
   664  	return nil
   665  }