github.com/matrixorigin/matrixone@v0.7.0/pkg/fileservice/local_etl_fs.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fileservice
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"io"
    21  	"os"
    22  	pathpkg "path"
    23  	"path/filepath"
    24  	"sort"
    25  	"strings"
    26  	"sync"
    27  
    28  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    29  )
    30  
    31  // LocalETLFS is a FileService implementation backed by local file system and suitable for ETL operations
    32  type LocalETLFS struct {
    33  	name     string
    34  	rootPath string
    35  
    36  	sync.RWMutex
    37  	dirFiles map[string]*os.File
    38  
    39  	createTempDirOnce sync.Once
    40  }
    41  
    42  var _ FileService = new(LocalETLFS)
    43  
    44  func NewLocalETLFS(name string, rootPath string) (*LocalETLFS, error) {
    45  	return &LocalETLFS{
    46  		name:     name,
    47  		rootPath: rootPath,
    48  		dirFiles: make(map[string]*os.File),
    49  	}, nil
    50  }
    51  
    52  func (l *LocalETLFS) Name() string {
    53  	return l.name
    54  }
    55  
    56  func (l *LocalETLFS) ensureTempDir() (err error) {
    57  	l.createTempDirOnce.Do(func() {
    58  		err = os.MkdirAll(filepath.Join(l.rootPath, ".tmp"), 0755)
    59  	})
    60  	return
    61  }
    62  
    63  func (l *LocalETLFS) Write(ctx context.Context, vector IOVector) error {
    64  	select {
    65  	case <-ctx.Done():
    66  		return ctx.Err()
    67  	default:
    68  	}
    69  
    70  	path, err := ParsePathAtService(vector.FilePath, l.name)
    71  	if err != nil {
    72  		return err
    73  	}
    74  	nativePath := l.toNativeFilePath(path.File)
    75  
    76  	// check existence
    77  	_, err = os.Stat(nativePath)
    78  	if err == nil {
    79  		// existed
    80  		return moerr.NewFileAlreadyExistsNoCtx(path.File)
    81  	}
    82  
    83  	return l.write(ctx, vector)
    84  }
    85  
    86  func (l *LocalETLFS) write(ctx context.Context, vector IOVector) error {
    87  	path, err := ParsePathAtService(vector.FilePath, l.name)
    88  	if err != nil {
    89  		return err
    90  	}
    91  	nativePath := l.toNativeFilePath(path.File)
    92  
    93  	// sort
    94  	sort.Slice(vector.Entries, func(i, j int) bool {
    95  		return vector.Entries[i].Offset < vector.Entries[j].Offset
    96  	})
    97  
    98  	// size
    99  	var size int64
   100  	if len(vector.Entries) > 0 {
   101  		last := vector.Entries[len(vector.Entries)-1]
   102  		size = int64(last.Offset + last.Size)
   103  	}
   104  
   105  	// write
   106  	if err := l.ensureTempDir(); err != nil {
   107  		return err
   108  	}
   109  	f, err := os.CreateTemp(
   110  		filepath.Join(l.rootPath, ".tmp"),
   111  		"*.tmp",
   112  	)
   113  	if err != nil {
   114  		return err
   115  	}
   116  	n, err := io.Copy(f, newIOEntriesReader(ctx, vector.Entries))
   117  	if err != nil {
   118  		return err
   119  	}
   120  	if n != size {
   121  		sizeUnknown := false
   122  		for _, entry := range vector.Entries {
   123  			if entry.Size < 0 {
   124  				sizeUnknown = true
   125  				break
   126  			}
   127  		}
   128  		if !sizeUnknown {
   129  			return moerr.NewSizeNotMatchNoCtx(path.File)
   130  		}
   131  	}
   132  	if err := f.Close(); err != nil {
   133  		return err
   134  	}
   135  
   136  	// ensure parent dir
   137  	parentDir, _ := filepath.Split(nativePath)
   138  	err = l.ensureDir(parentDir)
   139  	if err != nil {
   140  		return err
   141  	}
   142  
   143  	// move
   144  	if err := os.Rename(f.Name(), nativePath); err != nil {
   145  		return err
   146  	}
   147  
   148  	if err := l.syncDir(parentDir); err != nil {
   149  		return err
   150  	}
   151  
   152  	return nil
   153  }
   154  
   155  func (l *LocalETLFS) Read(ctx context.Context, vector *IOVector) error {
   156  	select {
   157  	case <-ctx.Done():
   158  		return ctx.Err()
   159  	default:
   160  	}
   161  
   162  	if len(vector.Entries) == 0 {
   163  		return moerr.NewEmptyVectorNoCtx()
   164  	}
   165  
   166  	path, err := ParsePathAtService(vector.FilePath, l.name)
   167  	if err != nil {
   168  		return err
   169  	}
   170  	nativePath := l.toNativeFilePath(path.File)
   171  
   172  	_, err = os.Stat(nativePath)
   173  	if os.IsNotExist(err) {
   174  		return moerr.NewFileNotFoundNoCtx(path.File)
   175  	}
   176  	if err != nil {
   177  		return err
   178  	}
   179  
   180  	for i, entry := range vector.Entries {
   181  		if entry.Size == 0 {
   182  			return moerr.NewEmptyRangeNoCtx(path.File)
   183  		}
   184  
   185  		if entry.done {
   186  			continue
   187  		}
   188  
   189  		if entry.WriterForRead != nil {
   190  			f, err := os.Open(nativePath)
   191  			if os.IsNotExist(err) {
   192  				return moerr.NewFileNotFoundNoCtx(path.File)
   193  			}
   194  			if err != nil {
   195  				return err
   196  			}
   197  			defer f.Close()
   198  			if entry.Offset > 0 {
   199  				if _, err := f.Seek(int64(entry.Offset), io.SeekStart); err != nil {
   200  					return err
   201  				}
   202  			}
   203  			r := (io.Reader)(f)
   204  			if entry.Size > 0 {
   205  				r = io.LimitReader(r, int64(entry.Size))
   206  			}
   207  
   208  			if entry.ToObject != nil {
   209  				r = io.TeeReader(r, entry.WriterForRead)
   210  				cr := &countingReader{
   211  					R: r,
   212  				}
   213  				obj, size, err := entry.ToObject(cr, nil)
   214  				if err != nil {
   215  					return err
   216  				}
   217  				vector.Entries[i].Object = obj
   218  				vector.Entries[i].ObjectSize = size
   219  				if entry.Size > 0 && cr.N != entry.Size {
   220  					return moerr.NewUnexpectedEOFNoCtx(path.File)
   221  				}
   222  
   223  			} else {
   224  				n, err := io.Copy(entry.WriterForRead, r)
   225  				if err != nil {
   226  					return err
   227  				}
   228  				if entry.Size > 0 && n != int64(entry.Size) {
   229  					return moerr.NewUnexpectedEOFNoCtx(path.File)
   230  				}
   231  			}
   232  
   233  		} else if entry.ReadCloserForRead != nil {
   234  			f, err := os.Open(nativePath)
   235  			if os.IsNotExist(err) {
   236  				return moerr.NewFileNotFoundNoCtx(path.File)
   237  			}
   238  			if err != nil {
   239  				return err
   240  			}
   241  			if entry.Offset > 0 {
   242  				if _, err := f.Seek(int64(entry.Offset), io.SeekStart); err != nil {
   243  					return err
   244  				}
   245  			}
   246  			r := (io.Reader)(f)
   247  			if entry.Size > 0 {
   248  				r = io.LimitReader(r, int64(entry.Size))
   249  			}
   250  			if entry.ToObject == nil {
   251  				*entry.ReadCloserForRead = &readCloser{
   252  					r:         r,
   253  					closeFunc: f.Close,
   254  				}
   255  			} else {
   256  				buf := new(bytes.Buffer)
   257  				*entry.ReadCloserForRead = &readCloser{
   258  					r: io.TeeReader(r, buf),
   259  					closeFunc: func() error {
   260  						defer f.Close()
   261  						obj, size, err := entry.ToObject(buf, buf.Bytes())
   262  						if err != nil {
   263  							return err
   264  						}
   265  						vector.Entries[i].Object = obj
   266  						vector.Entries[i].ObjectSize = size
   267  						return nil
   268  					},
   269  				}
   270  			}
   271  
   272  		} else {
   273  			f, err := os.Open(nativePath)
   274  			if os.IsNotExist(err) {
   275  				return moerr.NewFileNotFoundNoCtx(path.File)
   276  			}
   277  			if err != nil {
   278  				return err
   279  			}
   280  			defer f.Close()
   281  
   282  			if entry.Offset > 0 {
   283  				_, err = f.Seek(int64(entry.Offset), io.SeekStart)
   284  				if err != nil {
   285  					return err
   286  				}
   287  			}
   288  			r := (io.Reader)(f)
   289  			if entry.Size > 0 {
   290  				r = io.LimitReader(r, int64(entry.Size))
   291  			}
   292  
   293  			if entry.Size < 0 {
   294  				data, err := io.ReadAll(r)
   295  				if err != nil {
   296  					return err
   297  				}
   298  				entry.Data = data
   299  				entry.Size = int64(len(data))
   300  
   301  			} else {
   302  				if int64(len(entry.Data)) < entry.Size {
   303  					entry.Data = make([]byte, entry.Size)
   304  				}
   305  				n, err := io.ReadFull(r, entry.Data)
   306  				if err != nil {
   307  					return err
   308  				}
   309  				if int64(n) != entry.Size {
   310  					return moerr.NewUnexpectedEOFNoCtx(path.File)
   311  				}
   312  			}
   313  
   314  			if err := entry.setObjectFromData(); err != nil {
   315  				return err
   316  			}
   317  
   318  			vector.Entries[i] = entry
   319  		}
   320  
   321  	}
   322  
   323  	return nil
   324  
   325  }
   326  
   327  func (l *LocalETLFS) StatFile(ctx context.Context, filePath string) (*DirEntry, error) {
   328  	select {
   329  	case <-ctx.Done():
   330  		return nil, ctx.Err()
   331  	default:
   332  	}
   333  
   334  	path, err := ParsePathAtService(filePath, l.name)
   335  	if err != nil {
   336  		return nil, err
   337  	}
   338  	nativePath := l.toNativeFilePath(path.File)
   339  
   340  	stat, err := os.Stat(nativePath)
   341  	if os.IsNotExist(err) {
   342  		return nil, moerr.NewFileNotFoundNoCtx(path.File)
   343  	}
   344  	if err != nil {
   345  		return nil, err
   346  	}
   347  
   348  	if stat.IsDir() {
   349  		return nil, moerr.NewFileNotFoundNoCtx(path.File)
   350  	}
   351  
   352  	return &DirEntry{
   353  		Name:  pathpkg.Base(filePath),
   354  		IsDir: false,
   355  		Size:  stat.Size(),
   356  	}, nil
   357  }
   358  
   359  func (l *LocalETLFS) List(ctx context.Context, dirPath string) (ret []DirEntry, err error) {
   360  	select {
   361  	case <-ctx.Done():
   362  		return nil, ctx.Err()
   363  	default:
   364  	}
   365  
   366  	path, err := ParsePathAtService(dirPath, l.name)
   367  	if err != nil {
   368  		return nil, err
   369  	}
   370  	nativePath := l.toNativeFilePath(path.File)
   371  
   372  	f, err := os.Open(nativePath)
   373  	if os.IsNotExist(err) {
   374  		err = nil
   375  		return
   376  	}
   377  	if err != nil {
   378  		return nil, err
   379  	}
   380  	defer f.Close()
   381  
   382  	entries, err := f.ReadDir(-1)
   383  	for _, entry := range entries {
   384  		name := entry.Name()
   385  		if strings.HasPrefix(name, ".") {
   386  			continue
   387  		}
   388  		info, err := entry.Info()
   389  		if err != nil {
   390  			return nil, err
   391  		}
   392  		isDir, err := entryIsDir(nativePath, name, info)
   393  		if err != nil {
   394  			return nil, err
   395  		}
   396  		ret = append(ret, DirEntry{
   397  			Name:  name,
   398  			IsDir: isDir,
   399  			Size:  info.Size(),
   400  		})
   401  	}
   402  
   403  	sort.Slice(ret, func(i, j int) bool {
   404  		return ret[i].Name < ret[j].Name
   405  	})
   406  
   407  	if err != nil {
   408  		return ret, err
   409  	}
   410  
   411  	return
   412  }
   413  
   414  func (l *LocalETLFS) Delete(ctx context.Context, filePaths ...string) error {
   415  	select {
   416  	case <-ctx.Done():
   417  		return ctx.Err()
   418  	default:
   419  	}
   420  
   421  	for _, filePath := range filePaths {
   422  		if err := l.deleteSingle(ctx, filePath); err != nil {
   423  			return err
   424  		}
   425  	}
   426  	return nil
   427  }
   428  
   429  func (l *LocalETLFS) deleteSingle(ctx context.Context, filePath string) error {
   430  	path, err := ParsePathAtService(filePath, l.name)
   431  	if err != nil {
   432  		return err
   433  	}
   434  	nativePath := l.toNativeFilePath(path.File)
   435  
   436  	_, err = os.Stat(nativePath)
   437  	if os.IsNotExist(err) {
   438  		return moerr.NewFileNotFoundNoCtx(path.File)
   439  	}
   440  	if err != nil {
   441  		return err
   442  	}
   443  
   444  	err = os.Remove(nativePath)
   445  	if err != nil {
   446  		return err
   447  	}
   448  
   449  	parentDir, _ := filepath.Split(nativePath)
   450  	err = l.syncDir(parentDir)
   451  	if err != nil {
   452  		return err
   453  	}
   454  
   455  	return nil
   456  }
   457  
   458  func (l *LocalETLFS) ensureDir(nativePath string) error {
   459  	nativePath = filepath.Clean(nativePath)
   460  	if nativePath == "" {
   461  		return nil
   462  	}
   463  
   464  	// check existence by l.dirFiles
   465  	l.RLock()
   466  	_, ok := l.dirFiles[nativePath]
   467  	if ok {
   468  		// dir existed
   469  		l.RUnlock()
   470  		return nil
   471  	}
   472  	l.RUnlock()
   473  
   474  	// check existence by fstat
   475  	_, err := os.Stat(nativePath)
   476  	if err == nil {
   477  		// existed
   478  		return nil
   479  	}
   480  
   481  	// ensure parent
   482  	parent, _ := filepath.Split(nativePath)
   483  	if parent != nativePath {
   484  		if err := l.ensureDir(parent); err != nil {
   485  			return err
   486  		}
   487  	}
   488  
   489  	// create
   490  	if err := os.Mkdir(nativePath, 0755); err != nil {
   491  		return err
   492  	}
   493  
   494  	// sync parent dir
   495  	if err := l.syncDir(parent); err != nil {
   496  		return err
   497  	}
   498  
   499  	return nil
   500  }
   501  
   502  func (l *LocalETLFS) toOSPath(filePath string) string {
   503  	if os.PathSeparator == '/' {
   504  		return filePath
   505  	}
   506  	return strings.ReplaceAll(filePath, "/", osPathSeparatorStr)
   507  }
   508  
   509  func (l *LocalETLFS) syncDir(nativePath string) error {
   510  	l.Lock()
   511  	f, ok := l.dirFiles[nativePath]
   512  	if !ok {
   513  		var err error
   514  		f, err = os.Open(nativePath)
   515  		if err != nil {
   516  			l.Unlock()
   517  			return err
   518  		}
   519  		l.dirFiles[nativePath] = f
   520  	}
   521  	l.Unlock()
   522  	if err := f.Sync(); err != nil {
   523  		return err
   524  	}
   525  	return nil
   526  }
   527  
   528  func (l *LocalETLFS) toNativeFilePath(filePath string) string {
   529  	return filepath.Join(l.rootPath, l.toOSPath(filePath))
   530  }
   531  
   532  var _ ETLFileService = new(LocalETLFS)
   533  
   534  func (l *LocalETLFS) ETLCompatible() {}
   535  
   536  var _ MutableFileService = new(LocalETLFS)
   537  
   538  func (l *LocalETLFS) NewMutator(filePath string) (Mutator, error) {
   539  	path, err := ParsePathAtService(filePath, l.name)
   540  	if err != nil {
   541  		return nil, err
   542  	}
   543  	nativePath := l.toNativeFilePath(path.File)
   544  	f, err := os.OpenFile(nativePath, os.O_RDWR, 0644)
   545  	if os.IsNotExist(err) {
   546  		return nil, moerr.NewFileNotFoundNoCtx(path.File)
   547  	}
   548  	return &LocalETLFSMutator{
   549  		osFile: f,
   550  	}, nil
   551  }
   552  
   553  type LocalETLFSMutator struct {
   554  	osFile *os.File
   555  }
   556  
   557  func (l *LocalETLFSMutator) Mutate(ctx context.Context, entries ...IOEntry) error {
   558  	return l.mutate(ctx, 0, entries...)
   559  }
   560  
   561  func (l *LocalETLFSMutator) Append(ctx context.Context, entries ...IOEntry) error {
   562  	offset, err := l.osFile.Seek(0, io.SeekEnd)
   563  	if err != nil {
   564  		return err
   565  	}
   566  	return l.mutate(ctx, offset, entries...)
   567  }
   568  
   569  func (l *LocalETLFSMutator) mutate(ctx context.Context, baseOffset int64, entries ...IOEntry) error {
   570  	select {
   571  	case <-ctx.Done():
   572  		return ctx.Err()
   573  	default:
   574  	}
   575  
   576  	// write
   577  	for _, entry := range entries {
   578  
   579  		if entry.ReaderForWrite != nil {
   580  			// seek and copy
   581  			_, err := l.osFile.Seek(int64(entry.Offset+baseOffset), 0)
   582  			if err != nil {
   583  				return err
   584  			}
   585  			n, err := io.Copy(l.osFile, entry.ReaderForWrite)
   586  			if err != nil {
   587  				return err
   588  			}
   589  			if n != entry.Size {
   590  				return moerr.NewSizeNotMatchNoCtx("")
   591  			}
   592  
   593  		} else {
   594  			// WriteAt
   595  			n, err := l.osFile.WriteAt(entry.Data, int64(entry.Offset+baseOffset))
   596  			if err != nil {
   597  				return err
   598  			}
   599  			if int64(n) != entry.Size {
   600  				return moerr.NewSizeNotMatchNoCtx("")
   601  			}
   602  		}
   603  
   604  	}
   605  
   606  	return nil
   607  }
   608  
   609  func (l *LocalETLFSMutator) Close() error {
   610  	// sync
   611  	if err := l.osFile.Sync(); err != nil {
   612  		return err
   613  	}
   614  
   615  	// close
   616  	if err := l.osFile.Close(); err != nil {
   617  		return err
   618  	}
   619  
   620  	return nil
   621  }