github.com/matrixorigin/matrixone@v1.2.0/pkg/fileservice/file_with_checksum.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fileservice
    16  
    17  import (
    18  	"context"
    19  	"encoding/binary"
    20  	"hash/crc32"
    21  	"io"
    22  	"os"
    23  
    24  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    25  	"github.com/matrixorigin/matrixone/pkg/perfcounter"
    26  )
    27  
    28  // FileWithChecksum maps file contents to blocks with checksum
    29  type FileWithChecksum[T FileLike] struct {
    30  	ctx              context.Context
    31  	underlying       T
    32  	blockSize        int
    33  	blockContentSize int
    34  	contentOffset    int64
    35  	perfCounterSets  []*perfcounter.CounterSet
    36  }
    37  
    38  const (
    39  	_ChecksumSize     = crc32.Size
    40  	_DefaultBlockSize = 2048
    41  	_BlockContentSize = _DefaultBlockSize - _ChecksumSize
    42  	_BlockSize        = _BlockContentSize + _ChecksumSize
    43  )
    44  
    45  var (
    46  	crcTable = crc32.MakeTable(crc32.Castagnoli)
    47  )
    48  
    49  func NewFileWithChecksum[T FileLike](
    50  	ctx context.Context,
    51  	underlying T,
    52  	blockContentSize int,
    53  	perfCounterSets []*perfcounter.CounterSet,
    54  ) *FileWithChecksum[T] {
    55  	return &FileWithChecksum[T]{
    56  		ctx:              ctx,
    57  		underlying:       underlying,
    58  		blockSize:        blockContentSize + _ChecksumSize,
    59  		blockContentSize: blockContentSize,
    60  		perfCounterSets:  perfCounterSets,
    61  	}
    62  }
    63  
    64  func NewFileWithChecksumOSFile(
    65  	ctx context.Context,
    66  	underlying *os.File,
    67  	blockContentSize int,
    68  	perfCounterSets []*perfcounter.CounterSet,
    69  ) (*FileWithChecksum[*os.File], PutBack[*FileWithChecksum[*os.File]]) {
    70  	var f *FileWithChecksum[*os.File]
    71  	put := fileWithChecksumPoolOSFile.Get(&f)
    72  	f.ctx = ctx
    73  	f.underlying = underlying
    74  	f.blockSize = blockContentSize + _ChecksumSize
    75  	f.blockContentSize = blockContentSize
    76  	f.perfCounterSets = perfCounterSets
    77  	return f, put
    78  }
    79  
    80  var fileWithChecksumPoolOSFile = NewPool(
    81  	1024,
    82  	func() *FileWithChecksum[*os.File] {
    83  		return new(FileWithChecksum[*os.File])
    84  	},
    85  	func(f *FileWithChecksum[*os.File]) {
    86  		*f = emptyFileWithChecksumOSFile
    87  	},
    88  	nil,
    89  )
    90  
    91  var emptyFileWithChecksumOSFile FileWithChecksum[*os.File]
    92  
    93  var _ FileLike = new(FileWithChecksum[*os.File])
    94  
    95  func (f *FileWithChecksum[T]) ReadAt(buf []byte, offset int64) (n int, err error) {
    96  	defer func() {
    97  		perfcounter.Update(f.ctx, func(c *perfcounter.CounterSet) {
    98  			c.FileService.FileWithChecksum.Read.Add(int64(n))
    99  		}, f.perfCounterSets...)
   100  	}()
   101  
   102  	for len(buf) > 0 {
   103  
   104  		blockOffset, offsetInBlock := f.contentOffsetToBlockOffset(offset)
   105  		var data []byte
   106  		var putback PutBack[[]byte]
   107  		data, putback, err = f.readBlock(blockOffset)
   108  		if err != nil && err != io.EOF {
   109  			// read error
   110  			putback.Put()
   111  			return
   112  		}
   113  
   114  		data = data[offsetInBlock:]
   115  		nBytes := copy(buf, data)
   116  		buf = buf[nBytes:]
   117  		if err == io.EOF && nBytes != len(data) {
   118  			// not fully read
   119  			err = nil
   120  		}
   121  		putback.Put()
   122  
   123  		offset += int64(nBytes)
   124  		n += nBytes
   125  		if err == io.EOF && nBytes == 0 {
   126  			// no more data
   127  			break
   128  		}
   129  
   130  	}
   131  	return
   132  }
   133  
   134  func (f *FileWithChecksum[T]) Read(buf []byte) (n int, err error) {
   135  	n, err = f.ReadAt(buf, f.contentOffset)
   136  	f.contentOffset += int64(n)
   137  	return
   138  }
   139  
   140  func (f *FileWithChecksum[T]) WriteAt(buf []byte, offset int64) (n int, err error) {
   141  	defer func() {
   142  		perfcounter.Update(f.ctx, func(c *perfcounter.CounterSet) {
   143  			c.FileService.FileWithChecksum.Write.Add(int64(n))
   144  		}, f.perfCounterSets...)
   145  	}()
   146  
   147  	for len(buf) > 0 {
   148  
   149  		blockOffset, offsetInBlock := f.contentOffsetToBlockOffset(offset)
   150  		data, putback, err := f.readBlock(blockOffset)
   151  		if err != nil && err != io.EOF {
   152  			putback.Put()
   153  			return 0, err
   154  		}
   155  
   156  		// extend data
   157  		if len(data[offsetInBlock:]) == 0 {
   158  			nAppend := len(buf)
   159  			if nAppend+len(data) > f.blockContentSize {
   160  				nAppend = f.blockContentSize - len(data)
   161  			}
   162  			data = append(data, make([]byte, nAppend)...)
   163  		}
   164  
   165  		// copy to data
   166  		nBytes := copy(data[offsetInBlock:], buf)
   167  		buf = buf[nBytes:]
   168  
   169  		checksum := crc32.Checksum(data, crcTable)
   170  		checksumBytes := make([]byte, _ChecksumSize)
   171  		binary.LittleEndian.PutUint32(checksumBytes, checksum)
   172  		if n, err := f.underlying.WriteAt(checksumBytes, blockOffset); err != nil {
   173  			putback.Put()
   174  			return n, err
   175  		} else {
   176  			perfcounter.Update(f.ctx, func(c *perfcounter.CounterSet) {
   177  				c.FileService.FileWithChecksum.UnderlyingWrite.Add(int64(n))
   178  			}, f.perfCounterSets...)
   179  		}
   180  
   181  		if n, err := f.underlying.WriteAt(data, blockOffset+_ChecksumSize); err != nil {
   182  			putback.Put()
   183  			return n, err
   184  		} else {
   185  			perfcounter.Update(f.ctx, func(c *perfcounter.CounterSet) {
   186  				c.FileService.FileWithChecksum.UnderlyingWrite.Add(int64(n))
   187  			}, f.perfCounterSets...)
   188  		}
   189  
   190  		putback.Put()
   191  
   192  		n += nBytes
   193  		offset += int64(nBytes)
   194  	}
   195  
   196  	return
   197  }
   198  
   199  func (f *FileWithChecksum[T]) Write(buf []byte) (n int, err error) {
   200  	n, err = f.WriteAt(buf, f.contentOffset)
   201  	f.contentOffset += int64(n)
   202  	return
   203  }
   204  
   205  func (f *FileWithChecksum[T]) Seek(offset int64, whence int) (int64, error) {
   206  
   207  	fileSize, err := f.underlying.Seek(0, io.SeekEnd)
   208  	if err != nil {
   209  		return 0, err
   210  	}
   211  
   212  	nBlock := ceilingDiv(fileSize, int64(f.blockSize))
   213  	contentSize := fileSize - _ChecksumSize*nBlock
   214  
   215  	switch whence {
   216  	case io.SeekStart:
   217  		f.contentOffset = offset
   218  	case io.SeekCurrent:
   219  		f.contentOffset += offset
   220  	case io.SeekEnd:
   221  		f.contentOffset = contentSize + offset
   222  	}
   223  
   224  	if f.contentOffset < 0 {
   225  		f.contentOffset = 0
   226  	}
   227  	if f.contentOffset > contentSize {
   228  		f.contentOffset = contentSize
   229  	}
   230  
   231  	return f.contentOffset, nil
   232  }
   233  
   234  func (f *FileWithChecksum[T]) contentOffsetToBlockOffset(
   235  	contentOffset int64,
   236  ) (
   237  	blockOffset int64,
   238  	offsetInBlock int64,
   239  ) {
   240  
   241  	nBlock := contentOffset / int64(f.blockContentSize)
   242  	blockOffset += nBlock * int64(f.blockSize)
   243  
   244  	offsetInBlock = contentOffset % int64(f.blockContentSize)
   245  
   246  	return
   247  }
   248  
   249  func (f *FileWithChecksum[T]) readBlock(offset int64) (data []byte, putback PutBack[[]byte], err error) {
   250  
   251  	if f.blockSize == _DefaultBlockSize {
   252  		putback = bytesPoolDefaultBlockSize.Get(&data)
   253  	} else {
   254  		data = make([]byte, f.blockSize)
   255  		// putback does not need ptr, bytesPoolDefaultBlockSize put is a no-op
   256  		putback = PutBack[[]byte]{-1, nil, nil}
   257  	}
   258  
   259  	n, err := f.underlying.ReadAt(data, offset)
   260  	data = data[:n]
   261  	if err != nil && err != io.EOF {
   262  		return nil, putback, err
   263  	}
   264  
   265  	perfcounter.Update(f.ctx, func(c *perfcounter.CounterSet) {
   266  		c.FileService.FileWithChecksum.UnderlyingRead.Add(int64(n))
   267  	}, f.perfCounterSets...)
   268  
   269  	if n < _ChecksumSize {
   270  		// empty
   271  		return
   272  	}
   273  
   274  	checksum := binary.LittleEndian.Uint32(data[:_ChecksumSize])
   275  	data = data[_ChecksumSize:]
   276  
   277  	expectedChecksum := crc32.Checksum(data, crcTable)
   278  	if checksum != expectedChecksum {
   279  		return nil, putback, moerr.NewInternalErrorNoCtx("checksum not match")
   280  	}
   281  
   282  	return
   283  }