github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/integration/disk_flush_helpers.go (about) 1 // +build integration 2 3 // Copyright (c) 2017 Uber Technologies, Inc. 4 // 5 // Permission is hereby granted, free of charge, to any person obtaining a copy 6 // of this software and associated documentation files (the "Software"), to deal 7 // in the Software without restriction, including without limitation the rights 8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 // copies of the Software, and to permit persons to whom the Software is 10 // furnished to do so, subject to the following conditions: 11 // 12 // The above copyright notice and this permission notice shall be included in 13 // all copies or substantial portions of the Software. 14 // 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 // THE SOFTWARE. 22 23 package integration 24 25 import ( 26 "errors" 27 "fmt" 28 "testing" 29 "time" 30 31 "github.com/m3db/m3/src/dbnode/encoding" 32 "github.com/m3db/m3/src/dbnode/integration/generate" 33 ns "github.com/m3db/m3/src/dbnode/namespace" 34 "github.com/m3db/m3/src/dbnode/persist" 35 "github.com/m3db/m3/src/dbnode/persist/fs" 36 "github.com/m3db/m3/src/dbnode/sharding" 37 "github.com/m3db/m3/src/dbnode/storage" 38 "github.com/m3db/m3/src/dbnode/x/xio" 39 "github.com/m3db/m3/src/x/ident" 40 "github.com/m3db/m3/src/x/ident/testutil" 41 xtime "github.com/m3db/m3/src/x/time" 42 43 "github.com/pborman/uuid" 44 "github.com/stretchr/testify/require" 45 ) 46 47 var ( 48 errDiskFlushTimedOut = errors.New("flushing data to disk took too long") 49 ) 50 51 type snapshotID struct { 52 blockStart xtime.UnixNano 53 minVolume int 54 } 55 56 func getLatestSnapshotVolumeIndex( 57 fsOpts fs.Options, 58 shardSet sharding.ShardSet, 59 namespace ident.ID, 60 blockStart xtime.UnixNano, 61 ) int { 62 latestVolumeIndex := -1 63 64 for _, shard := range shardSet.AllIDs() { 65 snapshotFiles, err := fs.SnapshotFiles( 66 fsOpts.FilePathPrefix(), namespace, shard) 67 if err != nil { 68 panic(err) 69 } 70 latestSnapshot, ok := snapshotFiles.LatestVolumeForBlock(blockStart) 71 if !ok { 72 continue 73 } 74 if latestSnapshot.ID.VolumeIndex > latestVolumeIndex { 75 latestVolumeIndex = latestSnapshot.ID.VolumeIndex 76 } 77 } 78 79 return latestVolumeIndex 80 } 81 82 func waitUntilSnapshotFilesFlushed( 83 fsOpts fs.Options, 84 shardSet sharding.ShardSet, 85 namespace ident.ID, 86 expectedSnapshots []snapshotID, 87 timeout time.Duration, 88 ) (uuid.UUID, error) { 89 var snapshotID uuid.UUID 90 dataFlushed := func() bool { 91 // NB(bodu): We want to ensure that we have snapshot data that is consistent across 92 // ALL shards on a per block start basis. For each snapshot block start, we expect 93 // the data to exist in at least one shard. 94 expectedSnapshotsSeen := make([]bool, len(expectedSnapshots)) 95 for _, shard := range shardSet.AllIDs() { 96 for i, e := range expectedSnapshots { 97 snapshotFiles, err := fs.SnapshotFiles( 98 fsOpts.FilePathPrefix(), namespace, shard) 99 if err != nil { 100 panic(err) 101 } 102 103 latest, ok := snapshotFiles.LatestVolumeForBlock(e.blockStart) 104 if !ok { 105 // Each shard may not own data for all block starts. 106 continue 107 } 108 109 if !(latest.ID.VolumeIndex >= e.minVolume) { 110 // Cleanup manager can lag behind. 111 continue 112 } 113 114 // Mark expected snapshot as seen. 115 expectedSnapshotsSeen[i] = true 116 } 117 } 118 // We should have seen each expected snapshot in at least one shard. 119 for _, maybeSeen := range expectedSnapshotsSeen { 120 if !maybeSeen { 121 return false 122 } 123 } 124 return true 125 } 126 if waitUntil(dataFlushed, timeout) { 127 // Use snapshot metadata to get latest snapshotID as the view of snapshotID can be inconsistent 128 // across TSDB blocks. 129 snapshotMetadataFlushed := func() bool { 130 snapshotMetadatas, _, err := fs.SortedSnapshotMetadataFiles(fsOpts) 131 if err != nil { 132 panic(err) 133 } 134 135 if len(snapshotMetadatas) == 0 { 136 return false 137 } 138 snapshotID = snapshotMetadatas[len(snapshotMetadatas)-1].ID.UUID 139 return true 140 } 141 if waitUntil(snapshotMetadataFlushed, timeout) { 142 return snapshotID, nil 143 } 144 } 145 146 return snapshotID, errDiskFlushTimedOut 147 } 148 149 func waitUntilDataFilesFlushed( 150 filePathPrefix string, 151 shardSet sharding.ShardSet, 152 namespace ident.ID, 153 testData map[xtime.UnixNano]generate.SeriesBlock, 154 timeout time.Duration, 155 ) error { 156 dataFlushed := func() bool { 157 for timestamp, seriesList := range testData { 158 for _, series := range seriesList { 159 shard := shardSet.Lookup(series.ID) 160 exists, err := fs.DataFileSetExists( 161 filePathPrefix, namespace, shard, timestamp, 0) 162 if err != nil { 163 panic(err) 164 } 165 166 if !exists { 167 return false 168 } 169 } 170 } 171 return true 172 } 173 if waitUntil(dataFlushed, timeout) { 174 return nil 175 } 176 return errDiskFlushTimedOut 177 } 178 179 func waitUntilFileSetFilesExist( 180 filePathPrefix string, 181 files []fs.FileSetFileIdentifier, 182 timeout time.Duration, 183 ) error { 184 return waitUntilFileSetFilesExistOrNot(filePathPrefix, files, true, timeout) 185 } 186 187 func waitUntilFileSetFilesExistOrNot( 188 filePathPrefix string, 189 files []fs.FileSetFileIdentifier, 190 // Either wait for all files to exist of for all files to not exist. 191 checkForExistence bool, 192 timeout time.Duration, 193 ) error { 194 dataFlushed := func() bool { 195 for _, file := range files { 196 exists, err := fs.DataFileSetExists( 197 filePathPrefix, file.Namespace, file.Shard, file.BlockStart, file.VolumeIndex) 198 if err != nil { 199 panic(err) 200 } 201 202 if checkForExistence && !exists { 203 return false 204 } 205 206 if !checkForExistence && exists { 207 return false 208 } 209 } 210 return true 211 } 212 if waitUntil(dataFlushed, timeout) { 213 return nil 214 } 215 return errDiskFlushTimedOut 216 } 217 218 func verifyForTime( 219 t *testing.T, 220 storageOpts storage.Options, 221 reader fs.DataFileSetReader, 222 shardSet sharding.ShardSet, 223 iteratorPool encoding.ReaderIteratorPool, 224 timestamp xtime.UnixNano, 225 nsCtx ns.Context, 226 filesetType persist.FileSetType, 227 expected generate.SeriesBlock, 228 ) { 229 err := checkForTime( 230 storageOpts, reader, shardSet, iteratorPool, timestamp, 231 nsCtx, filesetType, expected) 232 require.NoError(t, err) 233 } 234 235 func checkForTime( 236 storageOpts storage.Options, 237 reader fs.DataFileSetReader, 238 shardSet sharding.ShardSet, 239 iteratorPool encoding.ReaderIteratorPool, 240 timestamp xtime.UnixNano, 241 nsCtx ns.Context, 242 filesetType persist.FileSetType, 243 expected generate.SeriesBlock, 244 ) error { 245 shards := make(map[uint32]struct{}) 246 for _, series := range expected { 247 shard := shardSet.Lookup(series.ID) 248 shards[shard] = struct{}{} 249 } 250 actual := make(generate.SeriesBlock, 0, len(expected)) 251 for shard := range shards { 252 rOpts := fs.DataReaderOpenOptions{ 253 Identifier: fs.FileSetFileIdentifier{ 254 Namespace: nsCtx.ID, 255 Shard: shard, 256 BlockStart: timestamp, 257 }, 258 FileSetType: filesetType, 259 } 260 261 filePathPrefix := storageOpts.CommitLogOptions().FilesystemOptions().FilePathPrefix() 262 switch filesetType { 263 // Identify the latest volume for this block start. 264 case persist.FileSetSnapshotType: 265 snapshotFiles, err := fs.SnapshotFiles(filePathPrefix, nsCtx.ID, shard) 266 if err != nil { 267 return err 268 } 269 latest, ok := snapshotFiles.LatestVolumeForBlock(timestamp) 270 if !ok { 271 return fmt.Errorf("no latest snapshot volume for block: %v", timestamp) 272 } 273 rOpts.Identifier.VolumeIndex = latest.ID.VolumeIndex 274 case persist.FileSetFlushType: 275 dataFiles, err := fs.DataFiles(filePathPrefix, nsCtx.ID, shard) 276 if err != nil { 277 return err 278 } 279 latest, ok := dataFiles.LatestVolumeForBlock(timestamp) 280 if !ok { 281 return fmt.Errorf("no latest data volume for block: %v", timestamp) 282 } 283 rOpts.Identifier.VolumeIndex = latest.ID.VolumeIndex 284 } 285 if err := reader.Open(rOpts); err != nil { 286 return err 287 } 288 289 for i := 0; i < reader.Entries(); i++ { 290 id, tagsIter, data, _, err := reader.Read() 291 if err != nil { 292 return err 293 } 294 295 tags, err := testutil.NewTagsFromTagIterator(tagsIter) 296 if err != nil { 297 return err 298 } 299 300 data.IncRef() 301 302 var datapoints []generate.TestValue 303 it := iteratorPool.Get() 304 it.Reset(xio.NewBytesReader64(data.Bytes()), nsCtx.Schema) 305 for it.Next() { 306 dp, _, ann := it.Current() 307 datapoints = append(datapoints, generate.TestValue{Datapoint: dp, Annotation: ann}) 308 } 309 if err := it.Err(); err != nil { 310 return err 311 } 312 it.Close() 313 314 actual = append(actual, generate.Series{ 315 ID: id, 316 Tags: tags, 317 Data: datapoints, 318 }) 319 320 data.DecRef() 321 data.Finalize() 322 } 323 if err := reader.Close(); err != nil { 324 return err 325 } 326 } 327 328 return compareSeriesList(expected, actual) 329 } 330 331 func verifyFlushedDataFiles( 332 t *testing.T, 333 shardSet sharding.ShardSet, 334 storageOpts storage.Options, 335 nsID ident.ID, 336 seriesMaps map[xtime.UnixNano]generate.SeriesBlock, 337 ) { 338 err := checkFlushedDataFiles(shardSet, storageOpts, nsID, seriesMaps) 339 require.NoError(t, err) 340 } 341 342 func checkFlushedDataFiles( 343 shardSet sharding.ShardSet, 344 storageOpts storage.Options, 345 nsID ident.ID, 346 seriesMaps map[xtime.UnixNano]generate.SeriesBlock, 347 ) error { 348 fsOpts := storageOpts.CommitLogOptions().FilesystemOptions() 349 reader, err := fs.NewReader(storageOpts.BytesPool(), fsOpts) 350 if err != nil { 351 return err 352 } 353 iteratorPool := storageOpts.ReaderIteratorPool() 354 nsCtx := ns.NewContextFor(nsID, storageOpts.SchemaRegistry()) 355 for timestamp, seriesList := range seriesMaps { 356 err := checkForTime( 357 storageOpts, reader, shardSet, iteratorPool, timestamp, 358 nsCtx, persist.FileSetFlushType, seriesList) 359 if err != nil { 360 return err 361 } 362 } 363 364 return nil 365 } 366 367 func verifySnapshottedDataFiles( 368 t *testing.T, 369 shardSet sharding.ShardSet, 370 storageOpts storage.Options, 371 nsID ident.ID, 372 seriesMaps map[xtime.UnixNano]generate.SeriesBlock, 373 ) { 374 fsOpts := storageOpts.CommitLogOptions().FilesystemOptions() 375 reader, err := fs.NewReader(storageOpts.BytesPool(), fsOpts) 376 require.NoError(t, err) 377 iteratorPool := storageOpts.ReaderIteratorPool() 378 nsCtx := ns.NewContextFor(nsID, storageOpts.SchemaRegistry()) 379 for blockStart, seriesList := range seriesMaps { 380 verifyForTime( 381 t, storageOpts, reader, shardSet, iteratorPool, blockStart, 382 nsCtx, persist.FileSetSnapshotType, seriesList) 383 } 384 385 }