github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/read_write_test.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package fs 22 23 import ( 24 "bytes" 25 "errors" 26 "os" 27 "path/filepath" 28 "sort" 29 "testing" 30 "time" 31 32 "github.com/m3db/m3/src/dbnode/digest" 33 "github.com/m3db/m3/src/dbnode/persist" 34 "github.com/m3db/m3/src/dbnode/ts" 35 "github.com/m3db/m3/src/x/checked" 36 "github.com/m3db/m3/src/x/ident" 37 xtime "github.com/m3db/m3/src/x/time" 38 39 "github.com/m3db/bloom/v4" 40 "github.com/pborman/uuid" 41 "github.com/stretchr/testify/assert" 42 "github.com/stretchr/testify/require" 43 ) 44 45 var ( 46 testSnapshotID = uuid.Parse("bbc85a98-bd0c-47fe-8b9a-89cde1b4540f") 47 ) 48 49 type testEntry struct { 50 id string 51 tags map[string]string 52 data []byte 53 } 54 55 func (e testEntry) ID() ident.ID { 56 return ident.StringID(e.id) 57 } 58 59 func (e testEntry) Tags() ident.Tags { 60 if e.tags == nil { 61 return ident.Tags{} 62 } 63 64 // Return in sorted order for deterministic order 65 var keys []string 66 for key := range e.tags { 67 keys = append(keys, key) 68 } 69 sort.Strings(keys) 70 71 var tags ident.Tags 72 for _, key := range keys { 73 tags.Append(ident.StringTag(key, e.tags[key])) 74 } 75 76 return tags 77 } 78 79 type testEntries []testEntry 80 81 func (e testEntries) Less(i, j int) bool { 82 return e[i].id < e[j].id 83 } 84 85 func (e testEntries) Len() int { 86 return len(e) 87 } 88 89 func (e testEntries) Swap(i, j int) { 90 e[i], e[j] = e[j], e[i] 91 } 92 93 func newTestWriter(t *testing.T, filePathPrefix string) DataFileSetWriter { 94 writer, err := NewWriter(testDefaultOpts. 95 SetFilePathPrefix(filePathPrefix). 96 SetWriterBufferSize(testWriterBufferSize)) 97 require.NoError(t, err) 98 return writer 99 } 100 101 func writeTestData( 102 t *testing.T, 103 w DataFileSetWriter, 104 shard uint32, 105 timestamp xtime.UnixNano, 106 entries []testEntry, 107 fileSetType persist.FileSetType, 108 ) { 109 writeTestDataWithVolume( 110 t, w, shard, timestamp, 0, entries, fileSetType) 111 } 112 113 func writeTestDataWithVolume( 114 t *testing.T, 115 w DataFileSetWriter, 116 shard uint32, 117 timestamp xtime.UnixNano, 118 volume int, 119 entries []testEntry, 120 fileSetType persist.FileSetType, 121 ) { 122 writerOpts := DataWriterOpenOptions{ 123 Identifier: FileSetFileIdentifier{ 124 Namespace: testNs1ID, 125 Shard: shard, 126 BlockStart: timestamp, 127 VolumeIndex: volume, 128 }, 129 BlockSize: testBlockSize, 130 FileSetType: fileSetType, 131 } 132 133 if fileSetType == persist.FileSetSnapshotType { 134 writerOpts.Snapshot.SnapshotTime = timestamp 135 writerOpts.Snapshot.SnapshotID = testSnapshotID 136 } 137 138 err := w.Open(writerOpts) 139 assert.NoError(t, err) 140 141 for i := range entries { 142 metadata := persist.NewMetadataFromIDAndTags(entries[i].ID(), 143 entries[i].Tags(), 144 persist.MetadataOptions{}) 145 assert.NoError(t, w.Write(metadata, 146 bytesRefd(entries[i].data), 147 digest.Checksum(entries[i].data))) 148 } 149 assert.NoError(t, w.Close()) 150 151 // Assert that any index entries released references they held 152 writer, ok := w.(*writer) 153 require.True(t, ok) 154 155 // Take ref to wholly allocated index entries slice 156 slice := writer.indexEntries[:cap(writer.indexEntries)] 157 158 // Check every entry has ID and Tags nil 159 for _, elem := range slice { 160 assert.Equal(t, persist.Metadata{}, elem.metadata) 161 } 162 } 163 164 type readTestType uint 165 166 const ( 167 readTestTypeData readTestType = iota 168 readTestTypeMetadata 169 ) 170 171 var readTestTypes = []readTestType{ 172 readTestTypeData, 173 readTestTypeMetadata, 174 } 175 176 //nolint: unparam 177 func readTestData(t *testing.T, r DataFileSetReader, shard uint32, 178 timestamp xtime.UnixNano, entries []testEntry) { 179 readTestDataWithStreamingOpt(t, r, shard, timestamp, entries, false) 180 181 sortedEntries := append(make(testEntries, 0, len(entries)), entries...) 182 sort.Sort(sortedEntries) 183 184 readTestDataWithStreamingOpt(t, r, shard, timestamp, sortedEntries, true) 185 } 186 187 // readTestDataWithStreamingOpt will test reading back the data matches what was written, 188 // note that this test also tests reuse of the reader since it first reads 189 // all the data then closes it, reopens and reads through again but just 190 // reading the metadata the second time. 191 // If it starts to fail during the pass that reads just the metadata it could 192 // be a newly introduced reader reuse bug. 193 func readTestDataWithStreamingOpt( 194 t *testing.T, 195 r DataFileSetReader, 196 shard uint32, 197 timestamp xtime.UnixNano, 198 entries []testEntry, 199 streamingEnabled bool, 200 ) { 201 for _, underTest := range readTestTypes { 202 rOpenOpts := DataReaderOpenOptions{ 203 Identifier: FileSetFileIdentifier{ 204 Namespace: testNs1ID, 205 Shard: shard, 206 BlockStart: timestamp, 207 }, 208 StreamingEnabled: streamingEnabled, 209 } 210 err := r.Open(rOpenOpts) 211 require.NoError(t, err) 212 213 require.Equal(t, len(entries), r.Entries()) 214 require.Equal(t, 0, r.EntriesRead()) 215 216 bloomFilter, err := r.ReadBloomFilter() 217 assert.NoError(t, err) 218 // Make sure the bloom filter doesn't always return true 219 assert.False(t, bloomFilter.Test([]byte("some_random_data"))) 220 221 expectedEntries := uint(len(entries)) 222 if expectedEntries == 0 { 223 expectedEntries = 1 224 } 225 expectedM, expectedK := bloom.EstimateFalsePositiveRate( 226 expectedEntries, defaultIndexBloomFilterFalsePositivePercent) 227 assert.Equal(t, expectedK, bloomFilter.K()) 228 // EstimateFalsePositiveRate always returns at least 1, so skip this check 229 // if len entries is 0 230 if len(entries) > 0 { 231 assert.Equal(t, expectedM, bloomFilter.M()) 232 } 233 234 for i := 0; i < r.Entries(); i++ { 235 switch underTest { 236 case readTestTypeData: 237 id, tags, data, checksum, err := readData(t, r) 238 require.NoError(t, err) 239 240 data.IncRef() 241 242 // Assert id 243 assert.Equal(t, entries[i].id, id.String()) 244 245 // Assert tags 246 tagMatcher := ident.NewTagIterMatcher(ident.NewTagsIterator(entries[i].Tags())) 247 assert.True(t, tagMatcher.Matches(tags)) 248 249 assert.True(t, bytes.Equal(entries[i].data, data.Bytes())) 250 assert.Equal(t, digest.Checksum(entries[i].data), checksum) 251 252 assert.Equal(t, i+1, r.EntriesRead()) 253 254 // Verify that the bloomFilter was bootstrapped properly by making sure it 255 // at least contains every ID 256 assert.True(t, bloomFilter.Test(id.Bytes())) 257 258 id.Finalize() 259 tags.Close() 260 data.DecRef() 261 data.Finalize() 262 263 case readTestTypeMetadata: 264 id, tags, length, checksum, err := readMetadata(t, r) 265 require.NoError(t, err) 266 267 // Assert id 268 assert.True(t, id.Equal(id)) 269 270 // Assert tags 271 tagMatcher := ident.NewTagIterMatcher(ident.NewTagsIterator(entries[i].Tags())) 272 assert.True(t, tagMatcher.Matches(tags)) 273 274 assert.Equal(t, digest.Checksum(entries[i].data), checksum) 275 assert.Equal(t, len(entries[i].data), length) 276 277 assert.Equal(t, i+1, r.MetadataRead()) 278 279 // Verify that the bloomFilter was bootstrapped properly by making sure it 280 // at least contains every ID 281 assert.True(t, bloomFilter.Test(id.Bytes())) 282 283 id.Finalize() 284 tags.Close() 285 } 286 } 287 288 require.NoError(t, r.Close()) 289 } 290 } 291 292 func TestSimpleReadWrite(t *testing.T) { 293 dir := createTempDir(t) 294 filePathPrefix := filepath.Join(dir, "") 295 defer os.RemoveAll(dir) 296 297 entries := []testEntry{ 298 {"foo", nil, []byte{1, 2, 3}}, 299 {"bar", nil, []byte{4, 5, 6}}, 300 {"baz", nil, make([]byte, 65536)}, 301 {"cat", nil, make([]byte, 100000)}, 302 {"foo+bar=baz,qux=qaz", map[string]string{ 303 "bar": "baz", 304 "qux": "qaz", 305 }, []byte{7, 8, 9}}, 306 } 307 308 w := newTestWriter(t, filePathPrefix) 309 writeTestData(t, w, 0, testWriterStart, entries, persist.FileSetFlushType) 310 311 r := newTestReader(t, filePathPrefix) 312 readTestData(t, r, 0, testWriterStart, entries) 313 } 314 315 func TestCheckpointFileSizeBytesSize(t *testing.T) { 316 // These values need to match so that the logic for determining whether 317 // a checkpoint file is complete or not remains correct. 318 require.Equal(t, digest.DigestLenBytes, CheckpointFileSizeBytes) 319 } 320 321 func TestDuplicateWrite(t *testing.T) { 322 dir := createTempDir(t) 323 filePathPrefix := filepath.Join(dir, "") 324 defer os.RemoveAll(dir) 325 326 entries := []testEntry{ 327 {"foo", nil, []byte{1, 2, 3}}, 328 {"foo", nil, []byte{4, 5, 6}}, 329 } 330 331 w := newTestWriter(t, filePathPrefix) 332 writerOpts := DataWriterOpenOptions{ 333 Identifier: FileSetFileIdentifier{ 334 Namespace: testNs1ID, 335 Shard: 0, 336 BlockStart: testWriterStart, 337 }, 338 BlockSize: testBlockSize, 339 } 340 err := w.Open(writerOpts) 341 require.NoError(t, err) 342 343 for i := range entries { 344 metadata := persist.NewMetadataFromIDAndTags(entries[i].ID(), 345 entries[i].Tags(), 346 persist.MetadataOptions{}) 347 require.NoError(t, w.Write(metadata, 348 bytesRefd(entries[i].data), 349 digest.Checksum(entries[i].data))) 350 } 351 require.Equal(t, errors.New("encountered duplicate ID: foo"), w.Close()) 352 } 353 354 func TestReadWithReusedReader(t *testing.T) { 355 dir := createTempDir(t) 356 filePathPrefix := filepath.Join(dir, "") 357 defer os.RemoveAll(dir) 358 359 entries := []testEntry{ 360 {"foo", nil, []byte{1, 2, 3}}, 361 {"bar", nil, []byte{4, 5, 6}}, 362 {"baz", nil, make([]byte, 65536)}, 363 {"cat", nil, make([]byte, 100000)}, 364 {"foo+bar=baz,qux=qaz", map[string]string{ 365 "bar": "baz", 366 "qux": "qaz", 367 }, []byte{7, 8, 9}}, 368 } 369 370 w := newTestWriter(t, filePathPrefix) 371 writeTestData(t, w, 0, testWriterStart, entries, persist.FileSetFlushType) 372 373 r := newTestReader(t, filePathPrefix) 374 readTestData(t, r, 0, testWriterStart, entries) 375 // Reuse the reader to read again 376 readTestData(t, r, 0, testWriterStart, entries) 377 } 378 379 func TestInfoReadWrite(t *testing.T) { 380 dir := createTempDir(t) 381 filePathPrefix := filepath.Join(dir, "") 382 defer os.RemoveAll(dir) 383 384 entries := []testEntry{ 385 {"foo", nil, []byte{1, 2, 3}}, 386 {"bar", nil, []byte{4, 5, 6}}, 387 {"baz", nil, make([]byte, 65536)}, 388 {"cat", nil, make([]byte, 100000)}, 389 {"foo+bar=baz,qux=qaz", map[string]string{ 390 "bar": "baz", 391 "qux": "qaz", 392 }, []byte{7, 8, 9}}, 393 } 394 395 w := newTestWriter(t, filePathPrefix) 396 writeTestData(t, w, 0, testWriterStart, entries, persist.FileSetFlushType) 397 398 readInfoFileResults := ReadInfoFiles(filePathPrefix, testNs1ID, 0, 16, nil, persist.FileSetFlushType) 399 require.Equal(t, 1, len(readInfoFileResults)) 400 for _, result := range readInfoFileResults { 401 require.NoError(t, result.Err.Error()) 402 } 403 404 infoFile := readInfoFileResults[0].Info 405 require.Equal(t, int64(testWriterStart), infoFile.BlockStart) 406 require.Equal(t, testBlockSize, time.Duration(infoFile.BlockSize)) 407 require.Equal(t, int64(len(entries)), infoFile.Entries) 408 } 409 410 func TestInfoReadWriteVolumeIndex(t *testing.T) { 411 dir := createTempDir(t) 412 filePathPrefix := filepath.Join(dir, "") 413 defer os.RemoveAll(dir) 414 415 var ( 416 entries = []testEntry{} 417 w = newTestWriter(t, filePathPrefix) 418 volume = 1 419 ) 420 421 writeTestDataWithVolume(t, w, 0, testWriterStart, volume, entries, persist.FileSetFlushType) 422 423 readInfoFileResults := ReadInfoFiles(filePathPrefix, testNs1ID, 0, 16, nil, persist.FileSetFlushType) 424 require.Equal(t, 1, len(readInfoFileResults)) 425 for _, result := range readInfoFileResults { 426 require.NoError(t, result.Err.Error()) 427 } 428 429 infoFile := readInfoFileResults[0].Info 430 require.Equal(t, testWriterStart, xtime.UnixNano(infoFile.BlockStart)) 431 require.Equal(t, volume, infoFile.VolumeIndex) 432 require.Equal(t, testBlockSize, time.Duration(infoFile.BlockSize)) 433 require.Equal(t, int64(len(entries)), infoFile.Entries) 434 } 435 436 func TestInfoReadWriteSnapshot(t *testing.T) { 437 dir := createTempDir(t) 438 filePathPrefix := filepath.Join(dir, "") 439 defer os.RemoveAll(dir) 440 441 w := newTestWriter(t, filePathPrefix) 442 writeTestData(t, w, 0, testWriterStart, nil, persist.FileSetSnapshotType) 443 444 snapshotFiles, err := SnapshotFiles(filePathPrefix, testNs1ID, 0) 445 require.NoError(t, err) 446 447 require.Equal(t, 1, len(snapshotFiles)) 448 449 snapshot := snapshotFiles[0] 450 snapshotTime, snapshotID, err := snapshot.SnapshotTimeAndID() 451 require.NoError(t, err) 452 require.True(t, testWriterStart.Equal(snapshotTime)) 453 require.Equal(t, testSnapshotID, snapshotID) 454 } 455 456 func TestReusingReaderWriter(t *testing.T) { 457 dir := createTempDir(t) 458 filePathPrefix := filepath.Join(dir, "") 459 defer os.RemoveAll(dir) 460 461 allEntries := [][]testEntry{ 462 { 463 {"foo", nil, []byte{1, 2, 3}}, 464 {"bar", nil, []byte{4, 5, 6}}, 465 }, 466 { 467 {"baz", nil, []byte{7, 8, 9}}, 468 }, 469 {}, 470 } 471 w := newTestWriter(t, filePathPrefix) 472 for i := range allEntries { 473 writeTestData( 474 t, w, 0, testWriterStart.Add(time.Duration(i)*time.Hour), allEntries[i], persist.FileSetFlushType) 475 } 476 477 r := newTestReader(t, filePathPrefix) 478 for i := range allEntries { 479 readTestData(t, r, 0, testWriterStart.Add(time.Duration(i)*time.Hour), allEntries[i]) 480 } 481 } 482 483 func TestReusingWriterAfterWriteError(t *testing.T) { 484 dir := createTempDir(t) 485 filePathPrefix := filepath.Join(dir, "") 486 defer os.RemoveAll(dir) 487 488 entries := []testEntry{ 489 {"foo", nil, []byte{1, 2, 3}}, 490 {"bar", nil, []byte{4, 5, 6}}, 491 } 492 w := newTestWriter(t, filePathPrefix) 493 shard := uint32(0) 494 writerOpts := DataWriterOpenOptions{ 495 Identifier: FileSetFileIdentifier{ 496 Namespace: testNs1ID, 497 Shard: shard, 498 BlockStart: testWriterStart, 499 }, 500 } 501 metadata := persist.NewMetadataFromIDAndTags(entries[0].ID(), 502 entries[0].Tags(), 503 persist.MetadataOptions{}) 504 require.NoError(t, w.Open(writerOpts)) 505 506 require.NoError(t, w.Write(metadata, 507 bytesRefd(entries[0].data), 508 digest.Checksum(entries[0].data))) 509 510 // Intentionally force a writer error. 511 w.(*writer).err = errors.New("foo") 512 metadata = persist.NewMetadataFromIDAndTags(entries[1].ID(), 513 entries[1].Tags(), 514 persist.MetadataOptions{}) 515 require.Equal(t, "foo", w.Write(metadata, 516 bytesRefd(entries[1].data), 517 digest.Checksum(entries[1].data)).Error()) 518 w.Close() 519 520 r := newTestReader(t, filePathPrefix) 521 rOpenOpts := DataReaderOpenOptions{ 522 Identifier: FileSetFileIdentifier{ 523 Namespace: testNs1ID, 524 Shard: shard, 525 BlockStart: testWriterStart, 526 }, 527 } 528 require.Equal(t, ErrCheckpointFileNotFound, r.Open(rOpenOpts)) 529 530 // Now reuse the writer and validate the data are written as expected. 531 writeTestData(t, w, shard, testWriterStart, entries, persist.FileSetFlushType) 532 readTestData(t, r, shard, testWriterStart, entries) 533 } 534 535 func TestWriterOnlyWritesNonNilBytes(t *testing.T) { 536 dir := createTempDir(t) 537 filePathPrefix := filepath.Join(dir, "") 538 defer os.RemoveAll(dir) 539 540 w := newTestWriter(t, filePathPrefix) 541 writerOpts := DataWriterOpenOptions{ 542 BlockSize: testBlockSize, 543 Identifier: FileSetFileIdentifier{ 544 Namespace: testNs1ID, 545 Shard: 0, 546 BlockStart: testWriterStart, 547 }, 548 } 549 metadata := persist.NewMetadataFromIDAndTags( 550 ident.StringID("foo"), 551 ident.Tags{}, 552 persist.MetadataOptions{}) 553 require.NoError(t, w.Open(writerOpts)) 554 555 err := w.WriteAll(metadata, 556 []checked.Bytes{ 557 checkedBytes([]byte{1, 2, 3}), 558 nil, 559 checkedBytes([]byte{4, 5, 6}), 560 }, 561 digest.Checksum([]byte{1, 2, 3, 4, 5, 6})) 562 require.NoError(t, err) 563 564 assert.NoError(t, w.Close()) 565 566 r := newTestReader(t, filePathPrefix) 567 readTestData(t, r, 0, testWriterStart, []testEntry{ 568 {"foo", nil, []byte{1, 2, 3, 4, 5, 6}}, 569 }) 570 } 571 572 func readData( 573 t *testing.T, 574 reader DataFileSetReader, 575 ) (id ident.ID, tags ident.TagIterator, data checked.Bytes, checksum uint32, err error) { 576 if reader.StreamingEnabled() { 577 entry, err := reader.StreamingRead() 578 if err != nil { 579 return nil, nil, nil, 0, err 580 } 581 tags := decodeTags(t, entry.EncodedTags) 582 return entry.ID, tags, checked.NewBytes(entry.Data, nil), entry.DataChecksum, err 583 } 584 585 return reader.Read() 586 } 587 588 func readMetadata( 589 t *testing.T, 590 reader DataFileSetReader, 591 ) (id ident.ID, tags ident.TagIterator, length int, checksum uint32, err error) { 592 if reader.StreamingEnabled() { 593 entry, err := reader.StreamingReadMetadata() 594 tags := decodeTags(t, entry.EncodedTags) 595 return entry.ID, tags, entry.Length, entry.DataChecksum, err 596 } 597 598 return reader.ReadMetadata() 599 } 600 601 func decodeTags(t *testing.T, encodedTags ts.EncodedTags) ident.TagIterator { 602 tags := ident.EmptyTagIterator 603 if len(encodedTags) > 0 { 604 tagsDecoder := testTagDecoderPool.Get() 605 tagsDecoder.Reset(checkedBytes(encodedTags)) 606 require.NoError(t, tagsDecoder.Err()) 607 tags = tagsDecoder 608 } 609 610 return tags 611 } 612 613 func checkedBytes(b []byte) checked.Bytes { 614 r := checked.NewBytes(b, nil) 615 r.IncRef() 616 return r 617 }