github.com/matrixorigin/matrixone@v0.7.0/pkg/fileservice/file_with_checksum.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fileservice 16 17 import ( 18 "encoding/binary" 19 "hash/crc32" 20 "io" 21 "os" 22 23 "github.com/matrixorigin/matrixone/pkg/common/moerr" 24 ) 25 26 // FileWithChecksum maps file contents to blocks with checksum 27 type FileWithChecksum[T FileLike] struct { 28 underlying T 29 blockSize int 30 blockContentSize int 31 contentOffset int64 32 } 33 34 const ( 35 _ChecksumSize = crc32.Size 36 _BlockContentSize = 2048 - _ChecksumSize 37 _BlockSize = _BlockContentSize + _ChecksumSize 38 ) 39 40 var ( 41 crcTable = crc32.MakeTable(crc32.Castagnoli) 42 43 ErrChecksumNotMatch = moerr.NewInternalErrorNoCtx("checksum not match") 44 ) 45 46 func NewFileWithChecksum[T FileLike]( 47 underlying T, 48 blockContentSize int, 49 ) *FileWithChecksum[T] { 50 return &FileWithChecksum[T]{ 51 underlying: underlying, 52 blockSize: blockContentSize + _ChecksumSize, 53 blockContentSize: blockContentSize, 54 } 55 } 56 57 var _ FileLike = new(FileWithChecksum[*os.File]) 58 59 func (f *FileWithChecksum[T]) ReadAt(buf []byte, offset int64) (n int, err error) { 60 for len(buf) > 0 { 61 blockOffset, offsetInBlock := f.contentOffsetToBlockOffset(offset) 62 var data []byte 63 data, err = f.readBlock(blockOffset) 64 if err != nil && err != io.EOF { 65 // read error 66 return 67 } 68 data = data[offsetInBlock:] 69 nBytes := copy(buf, data) 70 buf = buf[nBytes:] 71 if err == io.EOF && nBytes != len(data) { 72 // not fully read 73 err = nil 74 } 75 offset += int64(nBytes) 76 n += nBytes 77 if err == io.EOF && nBytes == 0 { 78 // no more data 79 break 80 } 81 } 82 return 83 } 84 85 func (f *FileWithChecksum[T]) Read(buf []byte) (n int, err error) { 86 n, err = f.ReadAt(buf, f.contentOffset) 87 f.contentOffset += int64(n) 88 return 89 } 90 91 func (f *FileWithChecksum[T]) WriteAt(buf []byte, offset int64) (n int, err error) { 92 for len(buf) > 0 { 93 94 blockOffset, offsetInBlock := f.contentOffsetToBlockOffset(offset) 95 data, err := f.readBlock(blockOffset) 96 if err != nil && err != io.EOF { 97 return 0, err 98 } 99 100 // extend data 101 if len(data[offsetInBlock:]) == 0 { 102 nAppend := len(buf) 103 if nAppend+len(data) > f.blockContentSize { 104 nAppend = f.blockContentSize - len(data) 105 } 106 data = append(data, make([]byte, nAppend)...) 107 } 108 109 // copy to data 110 nBytes := copy(data[offsetInBlock:], buf) 111 buf = buf[nBytes:] 112 113 checksum := crc32.Checksum(data, crcTable) 114 checksumBytes := make([]byte, _ChecksumSize) 115 binary.LittleEndian.PutUint32(checksumBytes, checksum) 116 if _, err := f.underlying.WriteAt(checksumBytes, blockOffset); err != nil { 117 return n, err 118 } 119 120 if _, err := f.underlying.WriteAt(data, blockOffset+_ChecksumSize); err != nil { 121 return n, err 122 } 123 124 n += nBytes 125 offset += int64(nBytes) 126 } 127 128 return 129 } 130 131 func (f *FileWithChecksum[T]) Write(buf []byte) (n int, err error) { 132 n, err = f.WriteAt(buf, f.contentOffset) 133 f.contentOffset += int64(n) 134 return 135 } 136 137 func (f *FileWithChecksum[T]) Seek(offset int64, whence int) (int64, error) { 138 139 fileSize, err := f.underlying.Seek(0, io.SeekEnd) 140 if err != nil { 141 return 0, err 142 } 143 144 nBlock := ceilingDiv(fileSize, int64(f.blockSize)) 145 contentSize := fileSize - _ChecksumSize*nBlock 146 147 switch whence { 148 case io.SeekStart: 149 f.contentOffset = offset 150 case io.SeekCurrent: 151 f.contentOffset += offset 152 case io.SeekEnd: 153 f.contentOffset = contentSize + offset 154 } 155 156 if f.contentOffset < 0 { 157 f.contentOffset = 0 158 } 159 if f.contentOffset > contentSize { 160 f.contentOffset = contentSize 161 } 162 163 return f.contentOffset, nil 164 } 165 166 func (f *FileWithChecksum[T]) contentOffsetToBlockOffset( 167 contentOffset int64, 168 ) ( 169 blockOffset int64, 170 offsetInBlock int64, 171 ) { 172 173 nBlock := contentOffset / int64(f.blockContentSize) 174 blockOffset += nBlock * int64(f.blockSize) 175 176 offsetInBlock = contentOffset % int64(f.blockContentSize) 177 178 return 179 } 180 181 func (f *FileWithChecksum[T]) readBlock(offset int64) (data []byte, err error) { 182 183 data = make([]byte, f.blockSize) 184 n, err := f.underlying.ReadAt(data, offset) 185 data = data[:n] 186 if err != nil && err != io.EOF { 187 return nil, err 188 } 189 190 if n < _ChecksumSize { 191 // empty 192 return 193 } 194 195 checksum := binary.LittleEndian.Uint32(data[:_ChecksumSize]) 196 data = data[_ChecksumSize:] 197 198 expectedChecksum := crc32.Checksum(data, crcTable) 199 if checksum != expectedChecksum { 200 return nil, ErrChecksumNotMatch 201 } 202 203 return 204 }