github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/index_write.go (about) 1 // Copyright (c) 2020 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package fs 22 23 import ( 24 "bufio" 25 "errors" 26 "fmt" 27 "io/fs" 28 "io/ioutil" 29 "os" 30 "time" 31 32 "github.com/m3db/m3/src/dbnode/digest" 33 "github.com/m3db/m3/src/dbnode/generated/proto/index" 34 "github.com/m3db/m3/src/dbnode/persist" 35 idxpersist "github.com/m3db/m3/src/m3ninx/persist" 36 xerrors "github.com/m3db/m3/src/x/errors" 37 xos "github.com/m3db/m3/src/x/os" 38 xtime "github.com/m3db/m3/src/x/time" 39 40 protobuftypes "github.com/gogo/protobuf/types" 41 ) 42 43 const ( 44 indexFileSetMajorVersion = 1 45 46 // indexWriteBufferSize is set to 250kb to avoid very frequent 47 // syscall overhead using the default buffer size (lot of large 48 // files written when writing the index). 49 indexWriteBufferSize = 2 << 17 // ~250kb 50 ) 51 52 var ( 53 errIndexFileSetWriterReturnsNoFiles = errors.New( 54 "index file set writer returned zero file types") 55 errIndexFileSetWriterOpenWithNoShards = errors.New( 56 "index file set writer opened with no shards specified") 57 ) 58 59 type indexWriter struct { 60 opts Options 61 filePathPrefix string 62 newFileMode os.FileMode 63 newDirectoryMode os.FileMode 64 fdWithDigest digest.FdWithDigestWriter 65 66 err error 67 blockSize time.Duration 68 start xtime.UnixNano 69 fileSetType persist.FileSetType 70 snapshotTime xtime.UnixNano 71 volumeIndex int 72 indexVolumeType idxpersist.IndexVolumeType 73 shards map[uint32]struct{} 74 segments []writtenIndexSegment 75 76 namespaceDir string 77 checkpointFilePath string 78 infoFilePath string 79 digestFilePath string 80 } 81 82 type writtenIndexSegment struct { 83 segmentType idxpersist.IndexSegmentType 84 majorVersion int 85 minorVersion int 86 metadata []byte 87 files []writtenIndexSegmentFile 88 } 89 90 type writtenIndexSegmentFile struct { 91 segmentFileType idxpersist.IndexSegmentFileType 92 digest uint32 93 } 94 95 // NewIndexWriter returns a new index writer with options. 96 func NewIndexWriter(opts Options) (IndexFileSetWriter, error) { 97 if err := opts.Validate(); err != nil { 98 return nil, err 99 } 100 return &indexWriter{ 101 opts: opts, 102 filePathPrefix: opts.FilePathPrefix(), 103 newFileMode: opts.NewFileMode(), 104 newDirectoryMode: opts.NewDirectoryMode(), 105 fdWithDigest: digest.NewFdWithDigestWriter(indexWriteBufferSize), 106 }, nil 107 } 108 109 func (w *indexWriter) Open(opts IndexWriterOpenOptions) error { 110 if len(opts.Shards) == 0 { 111 return errIndexFileSetWriterOpenWithNoShards 112 } 113 114 var ( 115 namespace = opts.Identifier.Namespace 116 blockStart = opts.Identifier.BlockStart 117 ) 118 w.err = nil 119 w.blockSize = opts.BlockSize 120 w.start = blockStart 121 w.fileSetType = opts.FileSetType 122 w.volumeIndex = opts.Identifier.VolumeIndex 123 w.shards = opts.Shards 124 w.snapshotTime = opts.Snapshot.SnapshotTime 125 w.indexVolumeType = opts.IndexVolumeType 126 if w.indexVolumeType == "" { 127 w.indexVolumeType = idxpersist.DefaultIndexVolumeType 128 } 129 w.segments = nil 130 131 switch opts.FileSetType { 132 case persist.FileSetSnapshotType: 133 w.namespaceDir = NamespaceIndexSnapshotDirPath(w.filePathPrefix, namespace) 134 case persist.FileSetFlushType: 135 w.namespaceDir = NamespaceIndexDataDirPath(w.filePathPrefix, namespace) 136 default: 137 return fmt.Errorf("cannot open index writer for fileset type: %s", opts.FileSetType) 138 } 139 if err := os.MkdirAll(w.namespaceDir, w.newDirectoryMode); err != nil { 140 return err 141 } 142 w.infoFilePath = FilesetPathFromTimeAndIndex(w.namespaceDir, blockStart, w.volumeIndex, InfoFileSuffix) 143 w.digestFilePath = FilesetPathFromTimeAndIndex(w.namespaceDir, blockStart, w.volumeIndex, DigestFileSuffix) 144 w.checkpointFilePath = FilesetPathFromTimeAndIndex(w.namespaceDir, blockStart, w.volumeIndex, CheckpointFileSuffix) 145 146 exists, err := CompleteCheckpointFileExists(w.checkpointFilePath) 147 if err != nil { 148 return err 149 } 150 if exists { 151 return xerrors.Wrapf(fs.ErrExist, 152 "checkpoint already exists for volume: %s", 153 w.checkpointFilePath) 154 } 155 156 // NB: Write out an incomplete index info file when we start writing a volume, 157 // this is later used in the cleanup of corrupted/incomplete index filesets. 158 infoFileData, err := w.infoFileData() 159 if err != nil { 160 return err 161 } 162 163 return w.writeInfoFile(infoFileData) 164 } 165 166 func (w *indexWriter) WriteSegmentFileSet( 167 segmentFileSet idxpersist.IndexSegmentFileSetWriter, 168 ) error { 169 if w.err != nil { 170 return w.err 171 } 172 173 segType := segmentFileSet.SegmentType() 174 if err := segType.Validate(); err != nil { 175 return w.markSegmentWriteError(segType, "", err) 176 } 177 178 seg := writtenIndexSegment{ 179 segmentType: segType, 180 majorVersion: segmentFileSet.MajorVersion(), 181 minorVersion: segmentFileSet.MinorVersion(), 182 metadata: segmentFileSet.SegmentMetadata(), 183 } 184 185 files := segmentFileSet.Files() 186 if len(files) == 0 { 187 return w.markSegmentWriteError(segType, "", 188 errIndexFileSetWriterReturnsNoFiles) 189 } 190 191 idx := len(w.segments) 192 for _, segFileType := range files { 193 if err := segFileType.Validate(); err != nil { 194 return w.markSegmentWriteError(segType, segFileType, err) 195 } 196 197 var filePath string 198 switch w.fileSetType { 199 case persist.FileSetSnapshotType: 200 filePath = snapshotIndexSegmentFilePathFromTimeAndIndex(w.namespaceDir, w.start, w.volumeIndex, 201 idx, segFileType) 202 case persist.FileSetFlushType: 203 filePath = filesetIndexSegmentFilePathFromTime(w.namespaceDir, w.start, w.volumeIndex, 204 idx, segFileType) 205 default: 206 err := fmt.Errorf("unknown fileset type: %s", w.fileSetType) 207 return w.markSegmentWriteError(segType, segFileType, err) 208 } 209 210 fd, err := OpenWritable(filePath, w.newFileMode) 211 if err != nil { 212 return w.markSegmentWriteError(segType, segFileType, err) 213 } 214 215 // Use buffered IO writer to write the file in case the reader 216 // returns small chunks of data 217 w.fdWithDigest.Reset(fd) 218 digest := w.fdWithDigest.Digest() 219 writer := bufio.NewWriter(w.fdWithDigest) 220 writeErr := segmentFileSet.WriteFile(segFileType, writer) 221 err = xerrors.FirstError(writeErr, writer.Flush(), w.fdWithDigest.Close()) 222 if err != nil { 223 return w.markSegmentWriteError(segType, segFileType, err) 224 } 225 226 seg.files = append(seg.files, writtenIndexSegmentFile{ 227 segmentFileType: segFileType, 228 digest: digest.Sum32(), 229 }) 230 } 231 232 w.segments = append(w.segments, seg) 233 return nil 234 } 235 236 func (w *indexWriter) markSegmentWriteError( 237 segType idxpersist.IndexSegmentType, 238 segFileType idxpersist.IndexSegmentFileType, 239 err error, 240 ) error { 241 w.err = fmt.Errorf("failed to write segment_type=%s, segment_file_type=%s: %v", 242 segType, segFileType, err) 243 return w.err 244 } 245 246 func (w *indexWriter) infoFileData() ([]byte, error) { 247 shards := make([]uint32, 0, len(w.shards)) 248 for shard := range w.shards { 249 shards = append(shards, shard) 250 } 251 info := &index.IndexVolumeInfo{ 252 MajorVersion: indexFileSetMajorVersion, 253 BlockStart: int64(w.start), 254 BlockSize: int64(w.blockSize), 255 FileType: int64(w.fileSetType), 256 Shards: shards, 257 SnapshotTime: int64(w.snapshotTime), 258 IndexVolumeType: &protobuftypes.StringValue{ 259 Value: string(w.indexVolumeType), 260 }, 261 } 262 for _, segment := range w.segments { 263 segmentInfo := &index.SegmentInfo{ 264 SegmentType: string(segment.segmentType), 265 MajorVersion: int64(segment.majorVersion), 266 MinorVersion: int64(segment.minorVersion), 267 Metadata: segment.metadata, 268 } 269 for _, file := range segment.files { 270 fileInfo := &index.SegmentFileInfo{ 271 SegmentFileType: string(file.segmentFileType), 272 } 273 segmentInfo.Files = append(segmentInfo.Files, fileInfo) 274 } 275 info.Segments = append(info.Segments, segmentInfo) 276 } 277 return info.Marshal() 278 } 279 280 func (w *indexWriter) digestsFileData(infoFileData []byte) ([]byte, error) { 281 digests := &index.IndexDigests{ 282 InfoDigest: digest.Checksum(infoFileData), 283 } 284 for _, segment := range w.segments { 285 segmentDigest := &index.SegmentDigest{ 286 SegmentType: string(segment.segmentType), 287 } 288 for _, file := range segment.files { 289 fileDigest := &index.SegmentFileDigest{ 290 SegmentFileType: string(file.segmentFileType), 291 Digest: file.digest, 292 } 293 segmentDigest.Files = append(segmentDigest.Files, fileDigest) 294 } 295 digests.SegmentDigests = append(digests.SegmentDigests, segmentDigest) 296 } 297 return digests.Marshal() 298 } 299 300 func (w *indexWriter) Close() error { 301 if w.err != nil { 302 // If a write error occurred don't even bother trying to write out file set 303 return w.err 304 } 305 306 // Write info file 307 infoFileData, err := w.infoFileData() 308 if err != nil { 309 return err 310 } 311 312 if err := w.writeInfoFile(infoFileData); err != nil { 313 return err 314 } 315 316 // Write digests file 317 digestsFileData, err := w.digestsFileData(infoFileData) 318 if err != nil { 319 return err 320 } 321 err = ioutil.WriteFile(w.digestFilePath, digestsFileData, w.newFileMode) 322 if err != nil { 323 return err 324 } 325 326 // Write checkpoint file 327 digestBuffer := digest.NewBuffer() 328 digestBuffer.WriteDigest(digest.Checksum(digestsFileData)) 329 return ioutil.WriteFile(w.checkpointFilePath, digestBuffer, w.newFileMode) 330 } 331 332 func (w *indexWriter) writeInfoFile(infoFileData []byte) error { 333 // NB: corrupted index fileset cleanup logic depends on info files being written ahead of 334 // all the other files. To avoid cases where writes could be observed in a different order, 335 // info files are being fsync'ed immediately after being written. 336 return xos.WriteFileSync(w.infoFilePath, infoFileData, w.newFileMode) 337 }