github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/file_manifest.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package nbs 23 24 import ( 25 "context" 26 "errors" 27 "fmt" 28 "io" 29 "os" 30 "path/filepath" 31 "strings" 32 "time" 33 34 "github.com/dolthub/fslock" 35 36 "github.com/dolthub/dolt/go/libraries/utils/file" 37 "github.com/dolthub/dolt/go/store/chunks" 38 "github.com/dolthub/dolt/go/store/hash" 39 "github.com/dolthub/dolt/go/store/util/tempfiles" 40 ) 41 42 const ( 43 manifestFileName = "manifest" 44 lockFileName = "LOCK" 45 lockFileTimeout = time.Millisecond * 100 46 47 storageVersion4 = "4" 48 49 prefixLen = 5 50 ) 51 52 var ErrUnreadableManifest = errors.New("could not read file manifest") 53 54 type manifestChecker func(upstream, contents manifestContents) error 55 56 // ParseManifest parses a manifest file from the supplied reader 57 func ParseManifest(r io.Reader) (ManifestInfo, error) { 58 return parseManifest(r) 59 } 60 61 func MaybeMigrateFileManifest(ctx context.Context, dir string) (bool, error) { 62 _, err := os.Stat(filepath.Join(dir, manifestFileName)) 63 if os.IsNotExist(err) { 64 // no manifest exists, no need to migrate 65 return false, nil 66 } else if err != nil { 67 return false, err 68 } 69 70 _, contents, err := parseIfExists(ctx, dir, nil) 71 if err != nil { 72 return false, err 73 } 74 75 if contents.manifestVers == StorageVersion { 76 // already on v5, no need to migrate 77 return false, nil 78 } 79 80 check := func(_, contents manifestContents) error { 81 if !contents.gcGen.IsEmpty() { 82 return errors.New("migrating from v4 to v5 should result in a manifest with a 0 gcGen") 83 } 84 85 return nil 86 } 87 88 _, err = updateWithChecker(ctx, dir, syncFlush, check, contents.lock, contents, nil) 89 90 if err != nil { 91 return false, err 92 } 93 94 return true, err 95 } 96 97 // getFileManifest makes a new file manifest. 98 func getFileManifest(ctx context.Context, dir string, mode updateMode) (m manifest, err error) { 99 lock := fslock.New(filepath.Join(dir, lockFileName)) 100 m = fileManifest{dir: dir, mode: mode, lock: lock} 101 102 var f *os.File 103 f, err = openIfExists(filepath.Join(dir, manifestFileName)) 104 if err != nil { 105 return nil, err 106 } else if f == nil { 107 return m, nil 108 } 109 defer func() { 110 // keep first error 111 if cerr := f.Close(); err == nil { 112 err = cerr 113 } 114 }() 115 116 var ok bool 117 ok, _, err = m.ParseIfExists(ctx, &Stats{}, nil) 118 if err != nil { 119 return nil, err 120 } else if !ok { 121 return nil, ErrUnreadableManifest 122 } 123 return 124 } 125 126 type updateMode byte 127 128 const ( 129 asyncFlush updateMode = 0 130 syncFlush updateMode = 1 131 ) 132 133 type fileManifest struct { 134 dir string 135 mode updateMode 136 lock *fslock.Lock 137 } 138 139 // Returns nil if path does not exist 140 func openIfExists(path string) (*os.File, error) { 141 f, err := os.Open(path) 142 if os.IsNotExist(err) { 143 return nil, nil 144 } else if err != nil { 145 return nil, err 146 } 147 return f, err 148 } 149 150 func (fm fileManifest) Name() string { 151 return fm.dir 152 } 153 154 // ParseIfExists looks for a LOCK and manifest file in fm.dir. If it finds 155 // them, it takes the lock, parses the manifest and returns its contents, 156 // setting |exists| to true. If not, it sets |exists| to false and returns. In 157 // that case, the other return values are undefined. If |readHook| is non-nil, 158 // it will be executed while ParseIfExists() holds the manifest file lock. 159 // This is to allow for race condition testing. 160 func (fm fileManifest) ParseIfExists( 161 ctx context.Context, 162 stats *Stats, 163 readHook func() error, 164 ) (exists bool, contents manifestContents, err error) { 165 t1 := time.Now() 166 defer func() { stats.ReadManifestLatency.SampleTimeSince(t1) }() 167 168 // no file lock on the read path 169 return parseIfExists(ctx, fm.dir, readHook) 170 } 171 172 func (fm fileManifest) Update(ctx context.Context, lastLock hash.Hash, newContents manifestContents, stats *Stats, writeHook func() error) (mc manifestContents, err error) { 173 t1 := time.Now() 174 defer func() { stats.WriteManifestLatency.SampleTimeSince(t1) }() 175 176 // hold the file lock while we update 177 if err = tryFileLock(fm.lock); err != nil { 178 return manifestContents{}, err 179 } 180 defer func() { 181 if cerr := fm.lock.Unlock(); err == nil { 182 err = cerr // keep first error 183 } 184 }() 185 186 checker := func(upstream, contents manifestContents) error { 187 if contents.gcGen != upstream.gcGen { 188 return chunks.ErrGCGenerationExpired 189 } 190 return nil 191 } 192 193 return updateWithChecker(ctx, fm.dir, fm.mode, checker, lastLock, newContents, writeHook) 194 } 195 196 func (fm fileManifest) UpdateGCGen(ctx context.Context, lastLock hash.Hash, newContents manifestContents, stats *Stats, writeHook func() error) (mc manifestContents, err error) { 197 t1 := time.Now() 198 defer func() { stats.WriteManifestLatency.SampleTimeSince(t1) }() 199 200 // hold the file lock while we update 201 if err = tryFileLock(fm.lock); err != nil { 202 return manifestContents{}, err 203 } 204 defer func() { 205 if cerr := fm.lock.Unlock(); err == nil { 206 err = cerr // keep first error 207 } 208 }() 209 210 checker := func(upstream, contents manifestContents) error { 211 if contents.gcGen == upstream.gcGen { 212 return errors.New("UpdateGCGen() must update the garbage collection generation") 213 } else if contents.root != upstream.root { 214 return errors.New("UpdateGCGen() cannot update the root") 215 } 216 return nil 217 } 218 219 return updateWithChecker(ctx, fm.dir, fm.mode, checker, lastLock, newContents, writeHook) 220 } 221 222 // parseV5Manifest parses the v5 manifest from the Reader given. Assumes the first field (the manifest version and 223 // following : character) have already been consumed by the reader. 224 // 225 // |-- String --|-- String --|-------- String --------|-------- String --------|-------- String -----------------| 226 // | nbs version:Noms version:Base32-encoded lock hash:Base32-encoded root hash:Base32-encoded GC generation hash 227 // 228 // |-- String --|- String --|...|-- String --|- String --| 229 // :table 1 hash:table 1 cnt:...:table N hash:table N cnt| 230 func parseV5Manifest(r io.Reader) (manifestContents, error) { 231 manifest, err := io.ReadAll(r) 232 233 if err != nil { 234 return manifestContents{}, err 235 } 236 237 slices := strings.Split(string(manifest), ":") 238 if len(slices) < prefixLen-1 || len(slices)%2 != 0 { 239 return manifestContents{}, ErrCorruptManifest 240 } 241 242 specs, err := parseSpecs(slices[prefixLen-1:]) 243 if err != nil { 244 return manifestContents{}, err 245 } 246 247 lock, ok := hash.MaybeParse(slices[1]) 248 if !ok { 249 return manifestContents{}, fmt.Errorf("Could not parse lock hash: %s", slices[1]) 250 } 251 252 gcGen, ok := hash.MaybeParse(slices[3]) 253 if !ok { 254 return manifestContents{}, fmt.Errorf("Could not parse GC generation hash: %s", slices[3]) 255 } 256 257 return manifestContents{ 258 manifestVers: StorageVersion, 259 nbfVers: slices[0], 260 lock: lock, 261 root: hash.Parse(slices[2]), 262 gcGen: gcGen, 263 specs: specs, 264 }, nil 265 } 266 267 // parseManifest parses the manifest bytes in the reader given and returns the contents. Consumes the first few bytes 268 func parseManifest(r io.Reader) (manifestContents, error) { 269 var version []byte 270 buf := make([]byte, 1) 271 272 // Parse the manifest up to the : character 273 chars := 0 274 for ; chars < 8; chars++ { 275 _, err := r.Read(buf) 276 if err != nil { 277 return manifestContents{}, err 278 } 279 if buf[0] == ':' { 280 break 281 } 282 version = append(version, buf[0]) 283 } 284 if chars >= 8 { 285 return manifestContents{}, ErrCorruptManifest 286 } 287 288 switch string(version) { 289 case storageVersion4: 290 return parseV4Manifest(r) 291 case StorageVersion: 292 return parseV5Manifest(r) 293 default: 294 return manifestContents{}, fmt.Errorf("Unknown manifest version: %s. You may need to update your client", string(version)) 295 } 296 } 297 298 func writeManifest(temp io.Writer, contents manifestContents) error { 299 strs := make([]string, 2*len(contents.specs)+prefixLen) 300 strs[0], strs[1], strs[2], strs[3], strs[4] = StorageVersion, contents.nbfVers, contents.lock.String(), contents.root.String(), contents.gcGen.String() 301 tableInfo := strs[prefixLen:] 302 formatSpecs(contents.specs, tableInfo) 303 _, err := io.WriteString(temp, strings.Join(strs, ":")) 304 305 return err 306 } 307 308 // parseV4Manifest parses the v4 manifest from the Reader given. Assumes the first field (the manifest version and 309 // following : character) have already been consumed by the reader. 310 // 311 // |-- String --|-- String --|-------- String --------|-------- String --------|-- String --|- String --|...|-- String --|- String --| 312 // | nbs version:Noms version:Base32-encoded lock hash:Base32-encoded root hash:table 1 hash:table 1 cnt:...:table N hash:table N cnt| 313 func parseV4Manifest(r io.Reader) (manifestContents, error) { 314 manifest, err := io.ReadAll(r) 315 316 if err != nil { 317 return manifestContents{}, err 318 } 319 320 slices := strings.Split(string(manifest), ":") 321 if len(slices) < 3 || len(slices)%2 == 0 { 322 return manifestContents{}, ErrCorruptManifest 323 } 324 325 specs, err := parseSpecs(slices[3:]) 326 327 if err != nil { 328 return manifestContents{}, err 329 } 330 331 ad, ok := hash.MaybeParse(slices[1]) 332 if !ok { 333 return manifestContents{}, fmt.Errorf("Could not parse lock hash: %s", slices[1]) 334 } 335 336 return manifestContents{ 337 manifestVers: storageVersion4, 338 nbfVers: slices[0], 339 lock: ad, 340 root: hash.Parse(slices[2]), 341 specs: specs, 342 }, nil 343 } 344 345 // parseIfExists parses the manifest file if it exists, callers must hold the file lock. 346 func parseIfExists(_ context.Context, dir string, readHook func() error) (exists bool, contents manifestContents, err error) { 347 if readHook != nil { 348 if err = readHook(); err != nil { 349 return false, manifestContents{}, err 350 } 351 } 352 353 var f *os.File 354 if f, err = openIfExists(filepath.Join(dir, manifestFileName)); err != nil { 355 return false, manifestContents{}, err 356 } else if f == nil { 357 return false, manifestContents{}, nil 358 } 359 defer func() { 360 if cerr := f.Close(); err == nil { 361 err = cerr // keep first error 362 } 363 }() 364 365 contents, err = parseManifest(f) 366 if err != nil { 367 return false, contents, err 368 } 369 exists = true 370 return 371 } 372 373 // updateWithChecker updates the manifest if |validate| is satisfied, callers must hold the file lock. 374 func updateWithChecker(_ context.Context, dir string, mode updateMode, validate manifestChecker, lastLock hash.Hash, newContents manifestContents, writeHook func() error) (mc manifestContents, err error) { 375 var tempManifestPath string 376 377 // Write a temporary manifest file, to be renamed over manifestFileName upon success. 378 // The closure here ensures this file is closed before moving on. 379 tempManifestPath, err = func() (name string, ferr error) { 380 var temp *os.File 381 temp, ferr = tempfiles.MovableTempFileProvider.NewFile(dir, "nbs_manifest_") 382 if ferr != nil { 383 return "", ferr 384 } 385 386 defer func() { 387 closeErr := temp.Close() 388 389 if ferr == nil { 390 ferr = closeErr 391 } 392 }() 393 394 ferr = writeManifest(temp, newContents) 395 if ferr != nil { 396 return "", ferr 397 } 398 399 if mode == syncFlush { 400 if ferr = temp.Sync(); ferr != nil { 401 return "", ferr 402 } 403 } 404 405 return temp.Name(), nil 406 }() 407 408 if err != nil { 409 return manifestContents{}, err 410 } 411 412 defer file.Remove(tempManifestPath) // If we rename below, this will be a no-op 413 414 // writeHook is for testing, allowing other code to slip in and try to do stuff while we hold the lock. 415 if writeHook != nil { 416 err = writeHook() 417 418 if err != nil { 419 return manifestContents{}, err 420 } 421 } 422 423 var upstream manifestContents 424 // Read current manifest (if it exists). The closure ensures that the file is closed before moving on, so we can rename over it later if need be. 425 manifestPath := filepath.Join(dir, manifestFileName) 426 upstream, err = func() (upstream manifestContents, ferr error) { 427 if f, ferr := openIfExists(manifestPath); ferr == nil && f != nil { 428 defer func() { 429 closeErr := f.Close() 430 431 if ferr == nil { 432 ferr = closeErr 433 } 434 }() 435 436 upstream, ferr = parseManifest(f) 437 438 if ferr != nil { 439 return manifestContents{}, ferr 440 } 441 442 if newContents.nbfVers != upstream.nbfVers { 443 return manifestContents{}, errors.New("Update cannot change manifest version") 444 } 445 446 return upstream, nil 447 } else if ferr != nil { 448 return manifestContents{}, ferr 449 } 450 451 if !lastLock.IsEmpty() { 452 return manifestContents{}, errors.New("new manifest created with non 0 lock") 453 } 454 455 return manifestContents{}, nil 456 }() 457 458 if err != nil { 459 return manifestContents{}, err 460 } 461 462 if lastLock != upstream.lock { 463 return upstream, nil 464 } 465 466 // this is where we assert that gcGen is correct 467 err = validate(upstream, newContents) 468 469 if err != nil { 470 return manifestContents{}, err 471 } 472 473 err = file.Rename(tempManifestPath, manifestPath) 474 if err != nil { 475 return manifestContents{}, err 476 } 477 478 if mode == syncFlush { 479 if err = file.SyncDirectoryHandle(dir); err != nil { 480 return manifestContents{}, err 481 } 482 } 483 484 return newContents, nil 485 } 486 487 func tryFileLock(lock *fslock.Lock) (err error) { 488 err = lock.LockWithTimeout(lockFileTimeout) 489 if errors.Is(err, fslock.ErrTimeout) { 490 err = errors.New("timed out reading database manifest") 491 } 492 return 493 }