github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/metamorphic/test.go (about) 1 // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package metamorphic 6 7 import ( 8 "context" 9 "fmt" 10 "io" 11 "os" 12 "path" 13 "sort" 14 "strings" 15 16 "github.com/cockroachdb/errors" 17 "github.com/cockroachdb/pebble" 18 "github.com/cockroachdb/pebble/vfs" 19 "github.com/cockroachdb/pebble/vfs/errorfs" 20 ) 21 22 type test struct { 23 // The list of ops to execute. The ops refer to slots in the batches, iters, 24 // and snapshots slices. 25 ops []op 26 opsWaitOn [][]int // op index -> op indexes 27 opsDone []chan struct{} // op index -> done channel 28 idx int 29 dir string 30 opts *pebble.Options 31 testOpts *TestOptions 32 writeOpts *pebble.WriteOptions 33 tmpDir string 34 // The DBs the test is run on. 35 dbs []*pebble.DB 36 // The slots for the batches, iterators, and snapshots. These are read and 37 // written by the ops to pass state from one op to another. 38 batches []*pebble.Batch 39 iters []*retryableIter 40 snapshots []readerCloser 41 } 42 43 func newTest(ops []op) *test { 44 return &test{ 45 ops: ops, 46 } 47 } 48 49 func (t *test) init(h *history, dir string, testOpts *TestOptions, numInstances int) error { 50 t.dir = dir 51 t.testOpts = testOpts 52 t.writeOpts = pebble.NoSync 53 if testOpts.strictFS { 54 t.writeOpts = pebble.Sync 55 } 56 t.opts = testOpts.Opts.EnsureDefaults() 57 t.opts.Logger = h 58 lel := pebble.MakeLoggingEventListener(t.opts.Logger) 59 t.opts.EventListener = &lel 60 t.opts.DebugCheck = func(db *pebble.DB) error { 61 // Wrap the ordinary DebugCheckLevels with retrying 62 // of injected errors. 63 return withRetries(func() error { 64 return pebble.DebugCheckLevels(db) 65 }) 66 } 67 if numInstances < 1 { 68 numInstances = 1 69 } 70 71 t.opsWaitOn, t.opsDone = computeSynchronizationPoints(t.ops) 72 73 defer t.opts.Cache.Unref() 74 75 // If an error occurs and we were using an in-memory FS, attempt to clone to 76 // on-disk in order to allow post-mortem debugging. Note that always using 77 // the on-disk FS isn't desirable because there is a large performance 78 // difference between in-memory and on-disk which causes different code paths 79 // and timings to be exercised. 80 maybeExit := func(err error) { 81 if err == nil || errors.Is(err, errorfs.ErrInjected) || errors.Is(err, pebble.ErrCancelledCompaction) { 82 return 83 } 84 t.maybeSaveData() 85 fmt.Fprintln(os.Stderr, err) 86 os.Exit(1) 87 } 88 89 // Exit early on any error from a background operation. 90 t.opts.EventListener.BackgroundError = func(err error) { 91 t.opts.Logger.Infof("background error: %s", err) 92 maybeExit(err) 93 } 94 t.opts.EventListener.CompactionEnd = func(info pebble.CompactionInfo) { 95 t.opts.Logger.Infof("%s", info) 96 maybeExit(info.Err) 97 } 98 t.opts.EventListener.FlushEnd = func(info pebble.FlushInfo) { 99 t.opts.Logger.Infof("%s", info) 100 if info.Err != nil && !strings.Contains(info.Err.Error(), "pebble: empty table") { 101 maybeExit(info.Err) 102 } 103 } 104 t.opts.EventListener.ManifestCreated = func(info pebble.ManifestCreateInfo) { 105 t.opts.Logger.Infof("%s", info) 106 maybeExit(info.Err) 107 } 108 t.opts.EventListener.ManifestDeleted = func(info pebble.ManifestDeleteInfo) { 109 t.opts.Logger.Infof("%s", info) 110 maybeExit(info.Err) 111 } 112 t.opts.EventListener.TableDeleted = func(info pebble.TableDeleteInfo) { 113 t.opts.Logger.Infof("%s", info) 114 maybeExit(info.Err) 115 } 116 t.opts.EventListener.TableIngested = func(info pebble.TableIngestInfo) { 117 t.opts.Logger.Infof("%s", info) 118 maybeExit(info.Err) 119 } 120 t.opts.EventListener.WALCreated = func(info pebble.WALCreateInfo) { 121 t.opts.Logger.Infof("%s", info) 122 maybeExit(info.Err) 123 } 124 t.opts.EventListener.WALDeleted = func(info pebble.WALDeleteInfo) { 125 t.opts.Logger.Infof("%s", info) 126 maybeExit(info.Err) 127 } 128 129 for i := range t.testOpts.CustomOpts { 130 if err := t.testOpts.CustomOpts[i].Open(t.opts); err != nil { 131 return err 132 } 133 } 134 135 t.dbs = make([]*pebble.DB, numInstances) 136 for i := range t.dbs { 137 var db *pebble.DB 138 var err error 139 if len(t.dbs) > 1 { 140 dir = path.Join(t.dir, fmt.Sprintf("db%d", i+1)) 141 } 142 err = withRetries(func() error { 143 db, err = pebble.Open(dir, t.opts) 144 return err 145 }) 146 if err != nil { 147 return err 148 } 149 t.dbs[i] = db 150 h.log.Printf("// db%d.Open() %v", i+1, err) 151 152 if t.testOpts.sharedStorageEnabled { 153 err = withRetries(func() error { 154 return db.SetCreatorID(uint64(i + 1)) 155 }) 156 if err != nil { 157 return err 158 } 159 h.log.Printf("// db%d.SetCreatorID() %v", i+1, err) 160 } 161 } 162 163 var err error 164 t.tmpDir = t.opts.FS.PathJoin(t.dir, "tmp") 165 if err = t.opts.FS.MkdirAll(t.tmpDir, 0755); err != nil { 166 return err 167 } 168 if t.testOpts.strictFS { 169 // Sync the whole directory path for the tmpDir, since restartDB() is executed during 170 // the test. That would reset MemFS to the synced state, which would make an unsynced 171 // directory disappear in the middle of the test. It is the responsibility of the test 172 // (not Pebble) to ensure that it can write the ssts that it will subsequently ingest 173 // into Pebble. 174 for { 175 f, err := t.opts.FS.OpenDir(dir) 176 if err != nil { 177 return err 178 } 179 if err = f.Sync(); err != nil { 180 return err 181 } 182 if err = f.Close(); err != nil { 183 return err 184 } 185 if len(dir) == 1 { 186 break 187 } 188 dir = t.opts.FS.PathDir(dir) 189 // TODO(sbhola): PathDir returns ".", which OpenDir() complains about. Fix. 190 if len(dir) == 1 { 191 dir = "/" 192 } 193 } 194 } 195 196 return nil 197 } 198 199 func (t *test) isFMV(dbID objID, fmv pebble.FormatMajorVersion) bool { 200 db := t.getDB(dbID) 201 return db.FormatMajorVersion() >= fmv 202 } 203 204 func (t *test) restartDB(dbID objID) error { 205 db := t.getDB(dbID) 206 if !t.testOpts.strictFS { 207 return nil 208 } 209 t.opts.Cache.Ref() 210 // The fs isn't necessarily a MemFS. 211 fs, ok := vfs.Root(t.opts.FS).(*vfs.MemFS) 212 if ok { 213 fs.SetIgnoreSyncs(true) 214 } 215 if err := db.Close(); err != nil { 216 return err 217 } 218 // Release any resources held by custom options. This may be used, for 219 // example, by the encryption-at-rest custom option (within the Cockroach 220 // repository) to close the file registry. 221 for i := range t.testOpts.CustomOpts { 222 if err := t.testOpts.CustomOpts[i].Close(t.opts); err != nil { 223 return err 224 } 225 } 226 if ok { 227 fs.ResetToSyncedState() 228 fs.SetIgnoreSyncs(false) 229 } 230 231 // TODO(jackson): Audit errorRate and ensure custom options' hooks semantics 232 // are well defined within the context of retries. 233 err := withRetries(func() (err error) { 234 // Reacquire any resources required by custom options. This may be used, for 235 // example, by the encryption-at-rest custom option (within the Cockroach 236 // repository) to reopen the file registry. 237 for i := range t.testOpts.CustomOpts { 238 if err := t.testOpts.CustomOpts[i].Open(t.opts); err != nil { 239 return err 240 } 241 } 242 dir := t.dir 243 if len(t.dbs) > 1 { 244 dir = path.Join(dir, fmt.Sprintf("db%d", dbID.slot())) 245 } 246 t.dbs[dbID.slot()-1], err = pebble.Open(dir, t.opts) 247 if err != nil { 248 return err 249 } 250 return err 251 }) 252 t.opts.Cache.Unref() 253 return err 254 } 255 256 func (t *test) maybeSaveDataInternal() error { 257 rootFS := vfs.Root(t.opts.FS) 258 if rootFS == vfs.Default { 259 return nil 260 } 261 if err := os.RemoveAll(t.dir); err != nil { 262 return err 263 } 264 if _, err := vfs.Clone(rootFS, vfs.Default, t.dir, t.dir); err != nil { 265 return err 266 } 267 if t.testOpts.sharedStorageEnabled { 268 fs := t.testOpts.sharedStorageFS 269 outputDir := vfs.Default.PathJoin(t.dir, "shared", string(t.testOpts.Opts.Experimental.CreateOnSharedLocator)) 270 vfs.Default.MkdirAll(outputDir, 0755) 271 objs, err := fs.List("", "") 272 if err != nil { 273 return err 274 } 275 for i := range objs { 276 reader, readSize, err := fs.ReadObject(context.TODO(), objs[i]) 277 if err != nil { 278 return err 279 } 280 buf := make([]byte, readSize) 281 if err := reader.ReadAt(context.TODO(), buf, 0); err != nil { 282 return err 283 } 284 outputPath := vfs.Default.PathJoin(outputDir, objs[i]) 285 outputFile, err := vfs.Default.Create(outputPath) 286 if err != nil { 287 return err 288 } 289 if _, err := outputFile.Write(buf); err != nil { 290 outputFile.Close() 291 return err 292 } 293 if err := outputFile.Close(); err != nil { 294 return err 295 } 296 } 297 } 298 return nil 299 } 300 301 // If an in-memory FS is being used, save the contents to disk. 302 func (t *test) maybeSaveData() { 303 if err := t.maybeSaveDataInternal(); err != nil { 304 t.opts.Logger.Infof("unable to save data: %s: %v", t.dir, err) 305 } 306 } 307 308 func (t *test) step(h *history) bool { 309 if t.idx >= len(t.ops) { 310 return false 311 } 312 t.ops[t.idx].run(t, h.recorder(-1 /* thread */, t.idx)) 313 t.idx++ 314 return true 315 } 316 317 func (t *test) setBatch(id objID, b *pebble.Batch) { 318 if id.tag() != batchTag { 319 panic(fmt.Sprintf("invalid batch ID: %s", id)) 320 } 321 t.batches[id.slot()] = b 322 } 323 324 func (t *test) setIter(id objID, i *pebble.Iterator) { 325 if id.tag() != iterTag { 326 panic(fmt.Sprintf("invalid iter ID: %s", id)) 327 } 328 t.iters[id.slot()] = &retryableIter{ 329 iter: i, 330 lastKey: nil, 331 } 332 } 333 334 type readerCloser interface { 335 pebble.Reader 336 io.Closer 337 } 338 339 func (t *test) setSnapshot(id objID, s readerCloser) { 340 if id.tag() != snapTag { 341 panic(fmt.Sprintf("invalid snapshot ID: %s", id)) 342 } 343 t.snapshots[id.slot()] = s 344 } 345 346 func (t *test) clearObj(id objID) { 347 switch id.tag() { 348 case dbTag: 349 t.dbs[id.slot()-1] = nil 350 case batchTag: 351 t.batches[id.slot()] = nil 352 case iterTag: 353 t.iters[id.slot()] = nil 354 case snapTag: 355 t.snapshots[id.slot()] = nil 356 } 357 } 358 359 func (t *test) getBatch(id objID) *pebble.Batch { 360 if id.tag() != batchTag { 361 panic(fmt.Sprintf("invalid batch ID: %s", id)) 362 } 363 return t.batches[id.slot()] 364 } 365 366 func (t *test) getCloser(id objID) io.Closer { 367 switch id.tag() { 368 case dbTag: 369 return t.dbs[id.slot()-1] 370 case batchTag: 371 return t.batches[id.slot()] 372 case iterTag: 373 return t.iters[id.slot()] 374 case snapTag: 375 return t.snapshots[id.slot()] 376 } 377 panic(fmt.Sprintf("cannot close ID: %s", id)) 378 } 379 380 func (t *test) getIter(id objID) *retryableIter { 381 if id.tag() != iterTag { 382 panic(fmt.Sprintf("invalid iter ID: %s", id)) 383 } 384 return t.iters[id.slot()] 385 } 386 387 func (t *test) getReader(id objID) pebble.Reader { 388 switch id.tag() { 389 case dbTag: 390 return t.dbs[id.slot()-1] 391 case batchTag: 392 return t.batches[id.slot()] 393 case snapTag: 394 return t.snapshots[id.slot()] 395 } 396 panic(fmt.Sprintf("invalid reader ID: %s", id)) 397 } 398 399 func (t *test) getWriter(id objID) pebble.Writer { 400 switch id.tag() { 401 case dbTag: 402 return t.dbs[id.slot()-1] 403 case batchTag: 404 return t.batches[id.slot()] 405 } 406 panic(fmt.Sprintf("invalid writer ID: %s", id)) 407 } 408 409 func (t *test) getDB(id objID) *pebble.DB { 410 switch id.tag() { 411 case dbTag: 412 return t.dbs[id.slot()-1] 413 default: 414 panic(fmt.Sprintf("invalid writer tag: %v", id.tag())) 415 } 416 } 417 418 // Compute the synchronization points between operations. When operating 419 // with more than 1 thread, operations must synchronize access to shared 420 // objects. Compute two slices the same length as ops. 421 // 422 // opsWaitOn: the value v at index i indicates that operation i must wait 423 // for the operation at index v to finish before it may run. NB: v < i 424 // 425 // opsDone: the channel at index i must be closed when the operation at index i 426 // completes. This slice is sparse. Operations that are never used as 427 // synchronization points may have a nil channel. 428 func computeSynchronizationPoints(ops []op) (opsWaitOn [][]int, opsDone []chan struct{}) { 429 opsDone = make([]chan struct{}, len(ops)) // operation index -> done channel 430 opsWaitOn = make([][]int, len(ops)) // operation index -> operation index 431 lastOpReference := make(map[objID]int) // objID -> operation index 432 for i, o := range ops { 433 // Find the last operation that involved the same receiver object. We at 434 // least need to wait on that operation. 435 receiver := o.receiver() 436 waitIndex, ok := lastOpReference[receiver] 437 lastOpReference[receiver] = i 438 if !ok { 439 // Only valid for i=0. For all other operations, the receiver should 440 // have been referenced by some other operation before it's used as 441 // a receiver. 442 if i != 0 && receiver.tag() != dbTag { 443 panic(fmt.Sprintf("op %s on receiver %s; first reference of %s", ops[i].String(), receiver, receiver)) 444 } 445 // The initOp is a little special. We do want to store the objects it's 446 // syncing on, in `lastOpReference`. 447 if i != 0 { 448 continue 449 } 450 } 451 452 // The last operation that referenced `receiver` is the one at index 453 // `waitIndex`. All operations with the same receiver are performed on 454 // the same thread. We only need to synchronize on the operation at 455 // `waitIndex` if `receiver` isn't also the receiver on that operation 456 // too. 457 if ops[waitIndex].receiver() != receiver { 458 opsWaitOn[i] = append(opsWaitOn[i], waitIndex) 459 } 460 461 // In additional to synchronizing on the operation's receiver operation, 462 // we may need to synchronize on additional objects. For example, 463 // batch0.Commit() must synchronize its receiver, batch0, but also on 464 // the DB since it mutates database state. 465 for _, syncObjID := range o.syncObjs() { 466 if vi, vok := lastOpReference[syncObjID]; vok { 467 opsWaitOn[i] = append(opsWaitOn[i], vi) 468 } 469 lastOpReference[syncObjID] = i 470 } 471 472 waitIndexes := opsWaitOn[i] 473 sort.Ints(waitIndexes) 474 for _, waitIndex := range waitIndexes { 475 // If this is the first operation that must wait on the operation at 476 // `waitIndex`, then there will be no channel for the operation yet. 477 // Create one. 478 if opsDone[waitIndex] == nil { 479 opsDone[waitIndex] = make(chan struct{}) 480 } 481 } 482 } 483 return opsWaitOn, opsDone 484 }