github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/journal_writer_test.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package nbs 16 17 import ( 18 "context" 19 "encoding/base32" 20 "math/rand" 21 "os" 22 "path/filepath" 23 "testing" 24 25 "github.com/stretchr/testify/assert" 26 "github.com/stretchr/testify/require" 27 28 "github.com/dolthub/dolt/go/store/chunks" 29 "github.com/dolthub/dolt/go/store/hash" 30 ) 31 32 func TestJournalWriterReadWrite(t *testing.T) { 33 type opKind byte 34 35 type operation struct { 36 kind opKind 37 buf []byte 38 readAt int64 39 } 40 41 const ( 42 readOp opKind = iota 43 writeOp 44 flushOp 45 ) 46 47 tests := []struct { 48 name string 49 size int 50 ops []operation 51 }{ 52 { 53 name: "smoke test", 54 size: 16, 55 }, 56 { 57 name: "write to empty file", 58 size: 16, 59 ops: []operation{ 60 {kind: writeOp, buf: []byte("lorem")}, 61 {kind: writeOp, buf: []byte("ipsum")}, 62 }, 63 }, 64 { 65 name: "read from non-empty file", 66 size: 16, 67 ops: []operation{ 68 {kind: writeOp, buf: []byte("loremipsum")}, 69 {kind: flushOp}, 70 {kind: readOp, buf: []byte("lorem"), readAt: 0}, 71 {kind: readOp, buf: []byte("ipsum"), readAt: 5}, 72 {kind: readOp, buf: []byte("loremipsum"), readAt: 0}, 73 }, 74 }, 75 { 76 name: "read new writes", 77 size: 16, 78 ops: []operation{ 79 {kind: writeOp, buf: []byte("lorem")}, 80 {kind: readOp, buf: []byte("lorem"), readAt: 0}, 81 {kind: writeOp, buf: []byte("ipsum")}, 82 {kind: readOp, buf: []byte("lorem"), readAt: 0}, 83 {kind: readOp, buf: []byte("ipsum"), readAt: 5}, 84 }, 85 }, 86 { 87 name: "read flushed writes", 88 size: 16, 89 ops: []operation{ 90 {kind: writeOp, buf: []byte("lorem")}, 91 {kind: flushOp}, 92 {kind: readOp, buf: []byte("lorem"), readAt: 0}, 93 {kind: writeOp, buf: []byte("ipsum")}, 94 {kind: readOp, buf: []byte("ipsum"), readAt: 5}, 95 {kind: readOp, buf: []byte("lorem"), readAt: 0}, 96 {kind: flushOp}, 97 }, 98 }, 99 { 100 name: "read partially flushed writes", 101 size: 16, 102 ops: []operation{ 103 {kind: writeOp, buf: []byte("lorem")}, 104 {kind: flushOp}, 105 {kind: writeOp, buf: []byte("ipsum")}, 106 {kind: readOp, buf: []byte("loremipsum"), readAt: 0}, 107 }, 108 }, 109 { 110 name: "successive writes trigger buffer flush ", 111 size: 16, 112 ops: []operation{ 113 {kind: writeOp, buf: []byte("lorem")}, 114 {kind: readOp, buf: []byte("lorem"), readAt: 0}, 115 {kind: writeOp, buf: []byte("ipsum")}, 116 {kind: readOp, buf: []byte("ipsum"), readAt: 5}, 117 {kind: writeOp, buf: []byte("dolor")}, 118 {kind: readOp, buf: []byte("dolor"), readAt: 10}, 119 {kind: writeOp, buf: []byte("sit")}, // triggers a flush 120 {kind: readOp, buf: []byte("sit"), readAt: 15}, 121 {kind: readOp, buf: []byte("loremipsumdolorsit"), readAt: 0}, 122 {kind: writeOp, buf: []byte("amet")}, 123 {kind: readOp, buf: []byte("amet"), readAt: 18}, 124 {kind: readOp, buf: []byte("loremipsumdolorsitamet"), readAt: 0}, 125 }, 126 }, 127 { 128 name: "flush empty buffer", 129 size: 16, 130 ops: []operation{ 131 {kind: writeOp, buf: []byte("loremipsum")}, 132 {kind: flushOp}, 133 }, 134 }, 135 { 136 name: "double flush write", 137 size: 16, 138 ops: []operation{ 139 {kind: writeOp, buf: []byte("loremipsum")}, 140 {kind: flushOp}, 141 {kind: writeOp, buf: []byte("dolor")}, 142 {kind: flushOp}, 143 {kind: flushOp}, 144 }, 145 }, 146 } 147 for _, test := range tests { 148 t.Run(test.name, func(t *testing.T) { 149 path := newTestFilePath(t) 150 j := newTestJournalWriter(t, path) 151 // set specific buffer size 152 j.buf = make([]byte, 0, test.size) 153 154 var off int64 155 var err error 156 for i, op := range test.ops { 157 switch op.kind { 158 case readOp: 159 act := make([]byte, len(op.buf)) 160 n, err := j.readAt(act, op.readAt) 161 assert.NoError(t, err, "operation %d errored", i) 162 assert.Equal(t, len(op.buf), n, "operation %d failed", i) 163 assert.Equal(t, op.buf, act, "operation %d failed", i) 164 case writeOp: 165 var p []byte 166 p, err = j.getBytes(len(op.buf)) 167 require.NoError(t, err, "operation %d errored", i) 168 n := copy(p, op.buf) 169 assert.Equal(t, len(op.buf), n, "operation %d failed", i) 170 off += int64(n) 171 case flushOp: 172 err = j.flush() 173 assert.NoError(t, err, "operation %d errored", i) 174 default: 175 t.Fatal("unknown opKind") 176 } 177 assert.Equal(t, off, j.offset()) 178 } 179 }) 180 } 181 } 182 183 func newTestJournalWriter(t *testing.T, path string) *journalWriter { 184 ctx := context.Background() 185 j, err := createJournalWriter(ctx, path) 186 require.NoError(t, err) 187 require.NotNil(t, j) 188 _, err = j.bootstrapJournal(ctx, nil) 189 require.NoError(t, err) 190 return j 191 } 192 193 func TestJournalWriterWriteCompressedChunk(t *testing.T) { 194 path := newTestFilePath(t) 195 j := newTestJournalWriter(t, path) 196 data := randomCompressedChunks(1024) 197 for a, cc := range data { 198 err := j.writeCompressedChunk(cc) 199 require.NoError(t, err) 200 r, _ := j.ranges.get(a) 201 validateLookup(t, j, r, cc) 202 } 203 validateAllLookups(t, j, data) 204 } 205 206 func TestJournalWriterBootstrap(t *testing.T) { 207 ctx := context.Background() 208 path := newTestFilePath(t) 209 j := newTestJournalWriter(t, path) 210 data := randomCompressedChunks(1024) 211 var last hash.Hash 212 for _, cc := range data { 213 err := j.writeCompressedChunk(cc) 214 require.NoError(t, err) 215 last = cc.Hash() 216 } 217 require.NoError(t, j.commitRootHash(last)) 218 require.NoError(t, j.Close()) 219 220 j, _, err := openJournalWriter(ctx, path) 221 require.NoError(t, err) 222 reflogBuffer := newReflogRingBuffer(10) 223 last, err = j.bootstrapJournal(ctx, reflogBuffer) 224 require.NoError(t, err) 225 assertExpectedIterationOrder(t, reflogBuffer, []string{last.String()}) 226 227 validateAllLookups(t, j, data) 228 229 source := journalChunkSource{journal: j} 230 for a, cc := range data { 231 buf, err := source.get(ctx, a, nil) 232 require.NoError(t, err) 233 ch, err := cc.ToChunk() 234 require.NoError(t, err) 235 assert.Equal(t, ch.Data(), buf) 236 } 237 } 238 239 func validateAllLookups(t *testing.T, j *journalWriter, data map[hash.Hash]CompressedChunk) { 240 // move |data| to addr16-keyed map 241 prefixMap := make(map[addr16]CompressedChunk, len(data)) 242 var prefix addr16 243 for a, cc := range data { 244 copy(prefix[:], a[:]) 245 prefixMap[prefix] = cc 246 } 247 iterRangeIndex(j.ranges, func(a addr16, r Range) (stop bool) { 248 validateLookup(t, j, r, prefixMap[a]) 249 return 250 }) 251 } 252 253 func iterRangeIndex(idx rangeIndex, cb func(addr16, Range) (stop bool)) { 254 idx.novel.Iter(func(a hash.Hash, r Range) (stop bool) { 255 return cb(toAddr16(a), r) 256 }) 257 idx.cached.Iter(cb) 258 } 259 260 func validateLookup(t *testing.T, j *journalWriter, r Range, cc CompressedChunk) { 261 buf := make([]byte, r.Length) 262 _, err := j.readAt(buf, int64(r.Offset)) 263 require.NoError(t, err) 264 act, err := NewCompressedChunk(cc.H, buf) 265 assert.NoError(t, err) 266 assert.Equal(t, cc.FullCompressedChunk, act.FullCompressedChunk) 267 } 268 269 func TestJournalWriterSyncClose(t *testing.T) { 270 path := newTestFilePath(t) 271 j := newTestJournalWriter(t, path) 272 p := []byte("sit") 273 buf, err := j.getBytes(len(p)) 274 require.NoError(t, err) 275 copy(buf, p) 276 j.flush() 277 assert.Equal(t, 0, len(j.buf)) 278 assert.Equal(t, 3, int(j.off)) 279 } 280 281 func newTestFilePath(t *testing.T) string { 282 path, err := os.MkdirTemp("", "") 283 require.NoError(t, err) 284 return filepath.Join(path, "journal.log") 285 } 286 287 func TestJournalIndexBootstrap(t *testing.T) { 288 // potentially indexed region of a journal 289 type epoch struct { 290 records map[hash.Hash]CompressedChunk 291 last hash.Hash 292 } 293 294 makeEpoch := func() (e epoch) { 295 e.records = randomCompressedChunks(8) 296 for h := range e.records { 297 e.last = hash.Hash(h) 298 break 299 } 300 return 301 } 302 303 tests := []struct { 304 name string 305 epochs []epoch 306 novel epoch 307 }{ 308 { 309 name: "smoke test", 310 epochs: []epoch{makeEpoch()}, 311 }, 312 { 313 name: "non-indexed journal", 314 epochs: nil, 315 novel: makeEpoch(), 316 }, 317 { 318 name: "partially indexed journal", 319 epochs: []epoch{makeEpoch()}, 320 novel: makeEpoch(), 321 }, 322 { 323 name: "multiple index records", 324 epochs: []epoch{ 325 makeEpoch(), 326 makeEpoch(), 327 makeEpoch(), 328 }, 329 novel: makeEpoch(), 330 }, 331 } 332 333 for _, test := range tests { 334 t.Run(test.name, func(t *testing.T) { 335 ctx := context.Background() 336 path := newTestFilePath(t) 337 j := newTestJournalWriter(t, path) 338 // setup 339 var recordCnt int 340 epochs := append(test.epochs, test.novel) 341 for i, e := range epochs { 342 for _, cc := range e.records { 343 recordCnt++ 344 assert.NoError(t, j.writeCompressedChunk(cc)) 345 if rand.Int()%10 == 0 { // periodic commits 346 assert.NoError(t, j.commitRootHash(cc.H)) 347 } 348 } 349 o := j.offset() // precommit offset 350 assert.NoError(t, j.commitRootHash(e.last)) // commit |e.last| 351 if i == len(epochs) { 352 break // don't index |test.novel| 353 } 354 assert.NoError(t, j.flushIndexRecord(e.last, o)) // write index record 355 } 356 err := j.Close() 357 require.NoError(t, err) 358 359 validateJournal := func(p string, expected []epoch) { 360 journal, ok, err := openJournalWriter(ctx, p) 361 require.NoError(t, err) 362 require.True(t, ok) 363 // bootstrap journal and validate chunk records 364 last, err := journal.bootstrapJournal(ctx, nil) 365 assert.NoError(t, err) 366 for _, e := range expected { 367 var act CompressedChunk 368 for a, exp := range e.records { 369 act, err = journal.getCompressedChunk(a) 370 assert.NoError(t, err) 371 assert.Equal(t, exp, act) 372 } 373 } 374 assert.Equal(t, expected[len(expected)-1].last, last) 375 assert.NoError(t, journal.Close()) 376 } 377 378 idxPath := filepath.Join(filepath.Dir(path), journalIndexFileName) 379 380 before, err := os.Stat(idxPath) 381 require.NoError(t, err) 382 383 lookupSize := int64(recordCnt * (1 + lookupSz)) 384 metaSize := int64(len(epochs)) * (1 + lookupMetaSz) 385 assert.Equal(t, lookupSize+metaSize, before.Size()) 386 387 // bootstrap journal using index 388 validateJournal(path, epochs) 389 // assert journal index unchanged 390 info, err := os.Stat(idxPath) 391 require.NoError(t, err) 392 assert.Equal(t, before.Size(), info.Size()) 393 394 // bootstrap journal with corrupted index 395 corruptJournalIndex(t, idxPath) 396 jnl, ok, err := openJournalWriter(ctx, idxPath) 397 require.NoError(t, err) 398 require.True(t, ok) 399 _, err = jnl.bootstrapJournal(ctx, nil) 400 assert.Error(t, err) 401 }) 402 } 403 } 404 405 var encoding = base32.NewEncoding("0123456789abcdefghijklmnopqrstuv") 406 407 // encode returns the base32 encoding in the Dolt alphabet. 408 func encode(data []byte) string { 409 return encoding.EncodeToString(data) 410 } 411 412 func randomCompressedChunks(cnt int) (compressed map[hash.Hash]CompressedChunk) { 413 compressed = make(map[hash.Hash]CompressedChunk) 414 var buf []byte 415 for i := 0; i < cnt; i++ { 416 k := rand.Intn(51) + 50 417 if k >= len(buf) { 418 buf = make([]byte, 64*1024) 419 rand.Read(buf) 420 } 421 c := chunks.NewChunk(buf[:k]) 422 buf = buf[k:] 423 compressed[c.Hash()] = ChunkToCompressedChunk(c) 424 } 425 return 426 } 427 428 func corruptJournalIndex(t *testing.T, path string) { 429 f, err := os.OpenFile(path, os.O_RDWR, 0666) 430 require.NoError(t, err) 431 info, err := f.Stat() 432 require.NoError(t, err) 433 buf := make([]byte, 64) 434 rand.Read(buf) 435 _, err = f.WriteAt(buf, info.Size()/2) 436 require.NoError(t, err) 437 } 438 439 func TestRangeIndex(t *testing.T) { 440 data := randomCompressedChunks(1024) 441 idx := newRangeIndex() 442 for _, c := range data { 443 idx.put(c.Hash(), Range{}) 444 } 445 for _, c := range data { 446 _, ok := idx.get(c.Hash()) 447 assert.True(t, ok) 448 } 449 assert.Equal(t, len(data), idx.novelCount()) 450 assert.Equal(t, len(data), int(idx.count())) 451 idx = idx.flatten() 452 assert.Equal(t, 0, idx.novelCount()) 453 assert.Equal(t, len(data), int(idx.count())) 454 }