github.com/m3db/m3@v1.5.0/src/dbnode/persist/fs/index_read.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package fs 22 23 import ( 24 "bytes" 25 "fmt" 26 "io" 27 "io/ioutil" 28 "os" 29 30 "github.com/m3db/m3/src/dbnode/digest" 31 "github.com/m3db/m3/src/dbnode/generated/proto/index" 32 "github.com/m3db/m3/src/dbnode/persist" 33 idxpersist "github.com/m3db/m3/src/m3ninx/persist" 34 "github.com/m3db/m3/src/x/mmap" 35 xtime "github.com/m3db/m3/src/x/time" 36 37 "go.uber.org/zap" 38 ) 39 40 const ( 41 mmapPersistFsIndexName = "mmap.persist.fs.index" 42 ) 43 44 type indexReader struct { 45 opts Options 46 filePathPrefix string 47 hugePagesOpts mmap.HugeTLBOptions 48 logger *zap.Logger 49 50 namespaceDir string 51 start xtime.UnixNano 52 fileSetType persist.FileSetType 53 volumeIndex int 54 55 currIdx int 56 info index.IndexVolumeInfo 57 expectedDigest index.IndexDigests 58 expectedDigestOfDigest uint32 59 readDigests indexReaderReadDigests 60 } 61 62 type indexReaderReadDigests struct { 63 infoFileDigest uint32 64 digestsFileDigest uint32 65 segments []indexReaderReadSegmentDigests 66 } 67 68 type indexReaderReadSegmentDigests struct { 69 segmentType idxpersist.IndexSegmentType 70 files []indexReaderReadSegmentFileDigest 71 } 72 73 type indexReaderReadSegmentFileDigest struct { 74 segmentFileType idxpersist.IndexSegmentFileType 75 digest uint32 76 } 77 78 // NewIndexReader returns a new index reader with options. 79 func NewIndexReader(opts Options) (IndexFileSetReader, error) { 80 if err := opts.Validate(); err != nil { 81 return nil, err 82 } 83 r := new(indexReader) 84 r.reset(opts) 85 return r, nil 86 } 87 88 func (r *indexReader) reset(opts Options) { 89 *r = indexReader{} 90 r.opts = opts 91 r.filePathPrefix = opts.FilePathPrefix() 92 r.hugePagesOpts = mmap.HugeTLBOptions{ 93 Enabled: opts.MmapEnableHugeTLB(), 94 Threshold: opts.MmapHugeTLBThreshold(), 95 } 96 r.logger = opts.InstrumentOptions().Logger() 97 } 98 99 func (r *indexReader) Open( 100 opts IndexReaderOpenOptions, 101 ) (IndexReaderOpenResult, error) { 102 var result IndexReaderOpenResult 103 104 // NB(r): so the reader can be reused. 105 r.reset(r.opts) 106 107 var ( 108 namespace = opts.Identifier.Namespace 109 checkpointFilepath string 110 infoFilepath string 111 digestFilepath string 112 ) 113 r.start = opts.Identifier.BlockStart 114 r.fileSetType = opts.FileSetType 115 r.volumeIndex = opts.Identifier.VolumeIndex 116 switch opts.FileSetType { 117 case persist.FileSetSnapshotType: 118 r.namespaceDir = NamespaceIndexSnapshotDirPath(r.filePathPrefix, namespace) 119 case persist.FileSetFlushType: 120 r.namespaceDir = NamespaceIndexDataDirPath(r.filePathPrefix, namespace) 121 default: 122 return result, fmt.Errorf("cannot open index reader for fileset type: %s", opts.FileSetType) 123 } 124 checkpointFilepath = FilesetPathFromTimeAndIndex(r.namespaceDir, r.start, r.volumeIndex, CheckpointFileSuffix) 125 infoFilepath = FilesetPathFromTimeAndIndex(r.namespaceDir, r.start, r.volumeIndex, InfoFileSuffix) 126 digestFilepath = FilesetPathFromTimeAndIndex(r.namespaceDir, r.start, r.volumeIndex, DigestFileSuffix) 127 128 // If there is no checkpoint file, don't read the index files. 129 if err := r.readCheckpointFile(checkpointFilepath); err != nil { 130 return result, err 131 } 132 if err := r.readDigestsFile(digestFilepath); err != nil { 133 return result, err 134 } 135 if err := r.readInfoFile(infoFilepath); err != nil { 136 return result, err 137 } 138 result.Shards = make(map[uint32]struct{}, len(r.info.Shards)) 139 for _, shard := range r.info.Shards { 140 result.Shards[shard] = struct{}{} 141 } 142 return result, nil 143 } 144 145 func (r *indexReader) readCheckpointFile(filePath string) error { 146 exists, err := CompleteCheckpointFileExists(filePath) 147 if err != nil { 148 return err 149 } 150 if !exists { 151 return ErrCheckpointFileNotFound 152 } 153 data, err := ioutil.ReadFile(filePath) 154 if err != nil { 155 return err 156 } 157 r.expectedDigestOfDigest = digest.Buffer(data).ReadDigest() 158 return nil 159 } 160 161 func (r *indexReader) readDigestsFile(filePath string) error { 162 data, err := ioutil.ReadFile(filePath) 163 if err != nil { 164 return err 165 } 166 r.readDigests.digestsFileDigest = digest.Checksum(data) 167 if err := r.validateDigestsFileDigest(); err != nil { 168 return err 169 } 170 return r.expectedDigest.Unmarshal(data) 171 } 172 173 func (r *indexReader) readInfoFile(filePath string) error { 174 data, err := ioutil.ReadFile(filePath) 175 if err != nil { 176 return err 177 } 178 r.readDigests.infoFileDigest = digest.Checksum(data) 179 if r.readDigests.infoFileDigest != r.expectedDigest.InfoDigest { 180 return fmt.Errorf("read info file checksum bad: expected=%d, actual=%d", 181 r.expectedDigest.InfoDigest, r.readDigests.infoFileDigest) 182 } 183 return r.info.Unmarshal(data) 184 } 185 186 func (r *indexReader) SegmentFileSets() int { 187 return len(r.info.Segments) 188 } 189 190 func (r *indexReader) ReadSegmentFileSet() ( 191 idxpersist.IndexSegmentFileSet, 192 error, 193 ) { 194 if r.currIdx >= len(r.info.Segments) { 195 return nil, io.EOF 196 } 197 198 var ( 199 segment = r.info.Segments[r.currIdx] 200 result = readableIndexSegmentFileSet{ 201 info: segment, 202 files: make([]idxpersist.IndexSegmentFile, 0, len(segment.Files)), 203 } 204 digests = indexReaderReadSegmentDigests{ 205 segmentType: idxpersist.IndexSegmentType(segment.SegmentType), 206 } 207 ) 208 success := false 209 defer func() { 210 // Do not close opened files if read finishes successfully. 211 if success { 212 return 213 } 214 for _, file := range result.files { 215 file.Close() 216 } 217 }() 218 for _, file := range segment.Files { 219 segFileType := idxpersist.IndexSegmentFileType(file.SegmentFileType) 220 221 var filePath string 222 switch r.fileSetType { 223 case persist.FileSetSnapshotType: 224 filePath = snapshotIndexSegmentFilePathFromTimeAndIndex(r.namespaceDir, r.start, r.volumeIndex, 225 r.currIdx, segFileType) 226 case persist.FileSetFlushType: 227 filePath = filesetIndexSegmentFilePathFromTime(r.namespaceDir, r.start, r.volumeIndex, 228 r.currIdx, segFileType) 229 default: 230 return nil, fmt.Errorf("unknown fileset type: %s", r.fileSetType) 231 } 232 233 var ( 234 fd *os.File 235 desc mmap.Descriptor 236 ) 237 mmapResult, err := mmap.Files(os.Open, map[string]mmap.FileDesc{ 238 filePath: { 239 File: &fd, 240 Descriptor: &desc, 241 Options: mmap.Options{ 242 Read: true, 243 HugeTLB: r.hugePagesOpts, 244 ReporterOptions: mmap.ReporterOptions{ 245 Context: mmap.Context{ 246 Name: mmapPersistFsIndexName, 247 }, 248 Reporter: r.opts.MmapReporter(), 249 }, 250 }, 251 }, 252 }) 253 if err != nil { 254 return nil, err 255 } 256 if warning := mmapResult.Warning; warning != nil { 257 r.logger.Warn("warning while mmapping files in reader", zap.Error(warning)) 258 } 259 260 file := newReadableIndexSegmentFileMmap(segFileType, fd, desc) 261 result.files = append(result.files, file) 262 263 if r.opts.IndexReaderAutovalidateIndexSegments() { 264 // Only checksum the file if we are autovalidating the index 265 // segments on open. 266 digests.files = append(digests.files, indexReaderReadSegmentFileDigest{ 267 segmentFileType: segFileType, 268 digest: digest.Checksum(desc.Bytes), 269 }) 270 } 271 272 // NB(bodu): Free mmaped bytes after we take the checksum so we don't 273 // get memory spikes at bootstrap time. 274 if err := mmap.MadviseDontNeed(desc); err != nil { 275 return nil, err 276 } 277 } 278 279 r.currIdx++ 280 r.readDigests.segments = append(r.readDigests.segments, digests) 281 success = true 282 return result, nil 283 } 284 285 func (r *indexReader) Validate() error { 286 if err := r.validateDigestsFileDigest(); err != nil { 287 return err 288 } 289 if err := r.validateInfoFileDigest(); err != nil { 290 return err 291 } 292 if !r.opts.IndexReaderAutovalidateIndexSegments() { 293 // Do not validate on segment open. 294 return nil 295 } 296 for i, segment := range r.info.Segments { 297 for j := range segment.Files { 298 if err := r.validateSegmentFileDigest(i, j); err != nil { 299 return err 300 } 301 } 302 } 303 return nil 304 } 305 306 func (r *indexReader) validateDigestsFileDigest() error { 307 if r.readDigests.digestsFileDigest != r.expectedDigestOfDigest { 308 return fmt.Errorf("read digests file checksum bad: expected=%d, actual=%d", 309 r.expectedDigestOfDigest, r.readDigests.digestsFileDigest) 310 } 311 return nil 312 } 313 314 func (r *indexReader) validateInfoFileDigest() error { 315 if r.readDigests.infoFileDigest != r.expectedDigest.InfoDigest { 316 return fmt.Errorf("read info file checksum bad: expected=%d, actual=%d", 317 r.expectedDigest.InfoDigest, r.readDigests.infoFileDigest) 318 } 319 return nil 320 } 321 322 func (r *indexReader) validateSegmentFileDigest(segmentIdx, fileIdx int) error { 323 if segmentIdx >= len(r.readDigests.segments) { 324 return fmt.Errorf( 325 "have not read correct number of segments to validate segment %d checksums: "+ 326 "need=%d, actual=%d", 327 segmentIdx, segmentIdx+1, len(r.readDigests.segments)) 328 } 329 if segmentIdx >= len(r.expectedDigest.SegmentDigests) { 330 return fmt.Errorf( 331 "have not read digest files correctly to validate segment %d checksums: "+ 332 "need=%d, actual=%d", 333 segmentIdx, segmentIdx+1, len(r.expectedDigest.SegmentDigests)) 334 } 335 336 if fileIdx >= len(r.readDigests.segments[segmentIdx].files) { 337 return fmt.Errorf( 338 "have not read correct number of segment files to validate segment %d checksums: "+ 339 "need=%d, actual=%d", 340 segmentIdx, fileIdx+1, len(r.readDigests.segments[segmentIdx].files)) 341 } 342 if fileIdx >= len(r.expectedDigest.SegmentDigests[segmentIdx].Files) { 343 return fmt.Errorf( 344 "have not read correct number of segment files to validate segment %d checksums: "+ 345 "need=%d, actual=%d", 346 segmentIdx, fileIdx+1, len(r.expectedDigest.SegmentDigests[segmentIdx].Files)) 347 } 348 349 expected := r.expectedDigest.SegmentDigests[segmentIdx].Files[fileIdx].Digest 350 actual := r.readDigests.segments[segmentIdx].files[fileIdx].digest 351 if actual != expected { 352 return fmt.Errorf("read segment file %d for segment %d checksum bad: expected=%d, actual=%d", 353 segmentIdx, fileIdx, expected, actual) 354 } 355 return nil 356 } 357 358 func (r *indexReader) IndexVolumeType() idxpersist.IndexVolumeType { 359 if r.info.IndexVolumeType == nil { 360 return idxpersist.DefaultIndexVolumeType 361 } 362 return idxpersist.IndexVolumeType(r.info.IndexVolumeType.Value) 363 } 364 365 func (r *indexReader) Close() error { 366 r.reset(r.opts) 367 return nil 368 } 369 370 // NB(r): to force the type to compile to match interface IndexSegmentFileSet 371 var _ IndexSegmentFileSet = readableIndexSegmentFileSet{} 372 373 type readableIndexSegmentFileSet struct { 374 info *index.SegmentInfo 375 files []idxpersist.IndexSegmentFile 376 } 377 378 func (s readableIndexSegmentFileSet) SegmentType() idxpersist.IndexSegmentType { 379 return idxpersist.IndexSegmentType(s.info.SegmentType) 380 } 381 382 func (s readableIndexSegmentFileSet) MajorVersion() int { 383 return int(s.info.MajorVersion) 384 } 385 386 func (s readableIndexSegmentFileSet) MinorVersion() int { 387 return int(s.info.MinorVersion) 388 } 389 390 func (s readableIndexSegmentFileSet) SegmentMetadata() []byte { 391 return s.info.Metadata 392 } 393 394 func (s readableIndexSegmentFileSet) Files() []idxpersist.IndexSegmentFile { 395 return s.files 396 } 397 398 type readableIndexSegmentFileMmap struct { 399 fileType idxpersist.IndexSegmentFileType 400 fd *os.File 401 bytesMmap mmap.Descriptor 402 reader bytes.Reader 403 } 404 405 func newReadableIndexSegmentFileMmap( 406 fileType idxpersist.IndexSegmentFileType, 407 fd *os.File, 408 bytesMmap mmap.Descriptor, 409 ) idxpersist.IndexSegmentFile { 410 r := &readableIndexSegmentFileMmap{ 411 fileType: fileType, 412 fd: fd, 413 bytesMmap: bytesMmap, 414 } 415 r.reader.Reset(r.bytesMmap.Bytes) 416 return r 417 } 418 419 func (f *readableIndexSegmentFileMmap) SegmentFileType() idxpersist.IndexSegmentFileType { 420 return f.fileType 421 } 422 423 func (f *readableIndexSegmentFileMmap) Mmap() (mmap.Descriptor, error) { 424 return f.bytesMmap, nil 425 } 426 427 func (f *readableIndexSegmentFileMmap) Read(b []byte) (int, error) { 428 return f.reader.Read(b) 429 } 430 431 func (f *readableIndexSegmentFileMmap) Close() error { 432 // Be sure to close the mmap before the file 433 if f.bytesMmap.Bytes != nil { 434 if err := mmap.Munmap(f.bytesMmap); err != nil { 435 return err 436 } 437 f.bytesMmap = mmap.Descriptor{} 438 } 439 if f.fd != nil { 440 if err := f.fd.Close(); err != nil { 441 return err 442 } 443 f.fd = nil 444 } 445 f.reader.Reset(nil) 446 return nil 447 }