github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/kbfs/simplefs/archive.go (about) 1 // Copyright 2024 Keybase, Inc. All rights reserved. Use of 2 // this source code is governed by the included BSD license. 3 4 package simplefs 5 6 import ( 7 "archive/zip" 8 "bytes" 9 "compress/gzip" 10 "crypto/sha256" 11 "encoding/hex" 12 "encoding/json" 13 "fmt" 14 "hash" 15 "io" 16 "io/fs" 17 "os" 18 "path/filepath" 19 "sort" 20 "sync" 21 "time" 22 23 "github.com/keybase/client/go/protocol/keybase1" 24 "github.com/pkg/errors" 25 "golang.org/x/net/context" 26 "gopkg.in/src-d/go-billy.v4" 27 ) 28 29 func loadArchiveStateFromJsonGz(ctx context.Context, simpleFS *SimpleFS, filePath string) (state *keybase1.SimpleFSArchiveState, err error) { 30 f, err := os.Open(filePath) 31 if err != nil { 32 simpleFS.log.CErrorf(ctx, "loadArchiveStateFromJsonGz: opening state file error: %v", err) 33 return nil, err 34 } 35 defer f.Close() 36 gzReader, err := gzip.NewReader(f) 37 if err != nil { 38 simpleFS.log.CErrorf(ctx, "loadArchiveStateFromJsonGz: creating gzip reader error: %v", err) 39 return nil, err 40 } 41 decoder := json.NewDecoder(gzReader) 42 err = decoder.Decode(&state) 43 if err != nil { 44 simpleFS.log.CErrorf(ctx, "loadArchiveStateFromJsonGz: decoding state file error: %v", err) 45 return nil, err 46 } 47 return state, nil 48 } 49 50 func writeArchiveStateIntoJsonGz(ctx context.Context, simpleFS *SimpleFS, filePath string, s *keybase1.SimpleFSArchiveState) error { 51 err := os.MkdirAll(filepath.Dir(filePath), 0755) 52 if err != nil { 53 simpleFS.log.CErrorf(ctx, "writeArchiveStateIntoJsonGz: os.MkdirAll error: %v", err) 54 return err 55 } 56 f, err := os.Create(filePath) 57 if err != nil { 58 simpleFS.log.CErrorf(ctx, "writeArchiveStateIntoJsonGz: creating state file error: %v", err) 59 return err 60 } 61 defer f.Close() 62 63 gzWriter := gzip.NewWriter(f) 64 defer gzWriter.Close() 65 66 encoder := json.NewEncoder(gzWriter) 67 err = encoder.Encode(s) 68 if err != nil { 69 simpleFS.log.CErrorf(ctx, "writeArchiveStateIntoJsonGz: encoding state file error: %v", err) 70 return err 71 } 72 73 return nil 74 } 75 76 type errorState struct { 77 err error 78 nextRetry time.Time 79 } 80 81 type archiveManager struct { 82 simpleFS *SimpleFS 83 84 // Just use a regular mutex rather than a rw one so all writes to 85 // persistent storage are synchronized. 86 mu sync.Mutex 87 state *keybase1.SimpleFSArchiveState 88 jobCtxCancellers map[string]func() 89 // jobID -> errorState. Populated when an error has happened. It's only 90 // valid for these phases: 91 // 92 // keybase1.SimpleFSArchiveJobPhase_Indexing 93 // keybase1.SimpleFSArchiveJobPhase_Copying 94 // keybase1.SimpleFSArchiveJobPhase_Zipping 95 // 96 // When nextRetry is current errorRetryWorker delete the errorState from 97 // this map, while also putting them back to the previous phase so the 98 // worker can pick it up. 99 errors map[string]errorState 100 101 indexingWorkerSignal chan struct{} 102 copyingWorkerSignal chan struct{} 103 zippingWorkerSignal chan struct{} 104 105 ctxCancel func() 106 } 107 108 func getStateFilePath(simpleFS *SimpleFS) string { 109 username := simpleFS.config.KbEnv().GetUsername() 110 cacheDir := simpleFS.getCacheDir() 111 return filepath.Join(cacheDir, fmt.Sprintf("kbfs-archive-%s.json.gz", username)) 112 } 113 114 func (m *archiveManager) flushStateFileLocked(ctx context.Context) error { 115 select { 116 case <-ctx.Done(): 117 return ctx.Err() 118 default: 119 } 120 err := writeArchiveStateIntoJsonGz(ctx, m.simpleFS, getStateFilePath(m.simpleFS), m.state) 121 if err != nil { 122 m.simpleFS.log.CErrorf(ctx, 123 "archiveManager.flushStateFileLocked: writing state file error: %v", err) 124 return err 125 } 126 return nil 127 } 128 129 func (m *archiveManager) flushStateFile(ctx context.Context) error { 130 m.mu.Lock() 131 defer m.mu.Unlock() 132 return m.flushStateFileLocked(ctx) 133 } 134 135 func (m *archiveManager) signal(ch chan struct{}) { 136 select { 137 case ch <- struct{}{}: 138 default: 139 // There's already a signal in the chan. Skipping this. 140 } 141 } 142 143 func (m *archiveManager) shutdown(ctx context.Context) { 144 // OK to cancel before flushStateFileLocked because we'll pass in the 145 // shutdown ctx there. 146 if m.ctxCancel != nil { 147 m.ctxCancel() 148 } 149 150 m.mu.Lock() 151 defer m.mu.Unlock() 152 err := m.flushStateFileLocked(ctx) 153 if err != nil { 154 m.simpleFS.log.CWarningf(ctx, "m.flushStateFileLocked error: %v", err) 155 } 156 } 157 158 func (m *archiveManager) startJob(ctx context.Context, job keybase1.SimpleFSArchiveJobDesc) error { 159 m.simpleFS.log.CDebugf(ctx, "+ archiveManager.startJob %#+v", job) 160 defer m.simpleFS.log.CDebugf(ctx, "- archiveManager.startJob") 161 162 m.mu.Lock() 163 defer m.mu.Unlock() 164 if _, ok := m.state.Jobs[job.JobID]; ok { 165 return errors.New("job ID already exists") 166 } 167 m.state.Jobs[job.JobID] = keybase1.SimpleFSArchiveJobState{ 168 Desc: job, 169 Phase: keybase1.SimpleFSArchiveJobPhase_Queued, 170 } 171 m.state.LastUpdated = keybase1.ToTime(time.Now()) 172 m.signal(m.indexingWorkerSignal) 173 return m.flushStateFileLocked(ctx) 174 } 175 176 func (m *archiveManager) cancelOrDismissJob(ctx context.Context, 177 jobID string) (err error) { 178 m.simpleFS.log.CDebugf(ctx, "+ archiveManager.cancelOrDismissJob") 179 defer m.simpleFS.log.CDebugf(ctx, "- archiveManager.cancelOrDismissJob %s", jobID) 180 m.mu.Lock() 181 defer m.mu.Unlock() 182 183 if cancel, ok := m.jobCtxCancellers[jobID]; ok { 184 cancel() 185 delete(m.jobCtxCancellers, jobID) 186 } 187 188 job, ok := m.state.Jobs[jobID] 189 if !ok { 190 return errors.New("job not found") 191 } 192 delete(m.state.Jobs, jobID) 193 194 err = os.RemoveAll(job.Desc.StagingPath) 195 if err != nil { 196 m.simpleFS.log.CWarningf(ctx, "removing staging path %q for job %s error: %v", 197 job.Desc.StagingPath, jobID, err) 198 } 199 200 return nil 201 } 202 203 func (m *archiveManager) getCurrentState(ctx context.Context) ( 204 state keybase1.SimpleFSArchiveState, errorStates map[string]errorState) { 205 m.simpleFS.log.CDebugf(ctx, "+ archiveManager.getCurrentState") 206 defer m.simpleFS.log.CDebugf(ctx, "- archiveManager.getCurrentState") 207 m.mu.Lock() 208 defer m.mu.Unlock() 209 errorStates = make(map[string]errorState) 210 for jobID, errState := range m.errors { 211 errorStates[jobID] = errState 212 } 213 return m.state.DeepCopy(), errorStates 214 } 215 216 func (m *archiveManager) changeJobPhaseLocked(ctx context.Context, 217 jobID string, newPhase keybase1.SimpleFSArchiveJobPhase) { 218 copy, ok := m.state.Jobs[jobID] 219 if !ok { 220 m.simpleFS.log.CWarningf(ctx, "job %s not found. it might have been canceled", jobID) 221 return 222 } 223 copy.Phase = newPhase 224 m.state.Jobs[jobID] = copy 225 } 226 func (m *archiveManager) changeJobPhase(ctx context.Context, 227 jobID string, newPhase keybase1.SimpleFSArchiveJobPhase) { 228 m.mu.Lock() 229 defer m.mu.Unlock() 230 m.changeJobPhaseLocked(ctx, jobID, newPhase) 231 } 232 233 func (m *archiveManager) startWorkerTask(ctx context.Context, 234 eligiblePhase keybase1.SimpleFSArchiveJobPhase, 235 newPhase keybase1.SimpleFSArchiveJobPhase) (jobID string, jobCtx context.Context, ok bool) { 236 jobCtx, cancel := context.WithCancel(ctx) 237 m.mu.Lock() 238 defer m.mu.Unlock() 239 for jobID := range m.state.Jobs { 240 if m.state.Jobs[jobID].Phase == eligiblePhase { 241 m.changeJobPhaseLocked(ctx, jobID, newPhase) 242 m.jobCtxCancellers[jobID] = cancel 243 return jobID, jobCtx, true 244 } 245 } 246 return "", nil, false 247 } 248 249 const archiveErrorRetryDuration = time.Minute 250 251 func (m *archiveManager) setJobError( 252 ctx context.Context, jobID string, err error) { 253 m.mu.Lock() 254 defer m.mu.Unlock() 255 nextRetry := time.Now().Add(archiveErrorRetryDuration) 256 m.simpleFS.log.CErrorf(ctx, "job %s nextRetry: %s", jobID, nextRetry) 257 m.errors[jobID] = errorState{ 258 err: err, 259 nextRetry: nextRetry, 260 } 261 } 262 263 func (m *archiveManager) doIndexing(ctx context.Context, jobID string) (err error) { 264 m.simpleFS.log.CDebugf(ctx, "+ doIndexing %s", jobID) 265 defer func() { m.simpleFS.log.CDebugf(ctx, "- doIndexing %s err: %v", jobID, err) }() 266 267 jobDesc := func() keybase1.SimpleFSArchiveJobDesc { 268 m.mu.Lock() 269 defer m.mu.Unlock() 270 return m.state.Jobs[jobID].Desc 271 }() 272 opid, err := m.simpleFS.SimpleFSMakeOpid(ctx) 273 if err != nil { 274 return err 275 } 276 defer m.simpleFS.SimpleFSClose(ctx, opid) 277 filter := keybase1.ListFilter_NO_FILTER 278 err = m.simpleFS.SimpleFSListRecursive(ctx, keybase1.SimpleFSListRecursiveArg{ 279 OpID: opid, 280 Path: keybase1.NewPathWithKbfsArchived(jobDesc.KbfsPathWithRevision), 281 Filter: filter, 282 }) 283 err = m.simpleFS.SimpleFSWait(ctx, opid) 284 if err != nil { 285 return err 286 } 287 288 listResult, err := m.simpleFS.SimpleFSReadList(ctx, opid) 289 if err != nil { 290 return err 291 } 292 293 var bytesTotal int64 294 manifest := make(map[string]keybase1.SimpleFSArchiveFile) 295 for _, e := range listResult.Entries { 296 manifest[e.Name] = keybase1.SimpleFSArchiveFile{ 297 State: keybase1.SimpleFSFileArchiveState_ToDo, 298 DirentType: e.DirentType, 299 } 300 if e.DirentType == keybase1.DirentType_FILE || 301 e.DirentType == keybase1.DirentType_EXEC { 302 bytesTotal += int64(e.Size) 303 } 304 } 305 306 func() { 307 m.mu.Lock() 308 defer m.mu.Unlock() 309 310 jobCopy, ok := m.state.Jobs[jobID] 311 if !ok { 312 m.simpleFS.log.CWarningf(ctx, "job %s not found. it might have been canceled", jobID) 313 return 314 } 315 jobCopy.Manifest = manifest 316 jobCopy.BytesTotal = bytesTotal 317 m.state.Jobs[jobID] = jobCopy 318 }() 319 return nil 320 } 321 322 func (m *archiveManager) indexingWorker(ctx context.Context) { 323 for { 324 select { 325 case <-ctx.Done(): 326 return 327 case <-m.indexingWorkerSignal: 328 } 329 330 jobID, jobCtx, ok := m.startWorkerTask(ctx, 331 keybase1.SimpleFSArchiveJobPhase_Queued, 332 keybase1.SimpleFSArchiveJobPhase_Indexing) 333 334 if !ok { 335 continue 336 } 337 // We got a task. Put another token into the signal channel so we 338 // check again on the next iteration. 339 m.signal(m.indexingWorkerSignal) 340 341 m.simpleFS.log.CDebugf(ctx, "indexing: %s", jobID) 342 343 err := m.doIndexing(jobCtx, jobID) 344 if err == nil { 345 m.simpleFS.log.CDebugf(jobCtx, "indexing done on job %s", jobID) 346 m.changeJobPhase(jobCtx, jobID, keybase1.SimpleFSArchiveJobPhase_Indexed) 347 m.signal(m.copyingWorkerSignal) // Done indexing! Notify the copying worker. 348 } else { 349 m.simpleFS.log.CErrorf(jobCtx, "indexing error on job %s: %v", jobID, err) 350 m.setJobError(ctx, jobID, err) 351 } 352 353 err = m.flushStateFile(ctx) 354 if err != nil { 355 m.simpleFS.log.CWarningf(ctx, "m.flushStateFileLocked error: %v", err) 356 } 357 } 358 } 359 360 type sha256TeeReader struct { 361 inner io.Reader 362 innerTeeReader io.Reader 363 h hash.Hash 364 } 365 366 var _ io.Reader = (*sha256TeeReader)(nil) 367 368 // Read implements the io.Reader interface. 369 func (r *sha256TeeReader) Read(p []byte) (n int, err error) { 370 return r.innerTeeReader.Read(p) 371 } 372 373 func (r *sha256TeeReader) getSum() []byte { 374 return r.h.Sum(nil) 375 } 376 377 func newSHA256TeeReader(inner io.Reader) (r *sha256TeeReader) { 378 r = &sha256TeeReader{ 379 inner: inner, 380 h: sha256.New(), 381 } 382 r.innerTeeReader = io.TeeReader(r.inner, r.h) 383 return r 384 } 385 386 type bytesUpdaterFunc = func(delta int64) 387 388 func ctxAwareCopy( 389 ctx context.Context, to io.Writer, from io.Reader, 390 bytesUpdater bytesUpdaterFunc) error { 391 for { 392 select { 393 case <-ctx.Done(): 394 return ctx.Err() 395 default: 396 } 397 n, err := io.CopyN(to, from, 64*1024) 398 switch err { 399 case nil: 400 bytesUpdater(n) 401 case io.EOF: 402 bytesUpdater(n) 403 return nil 404 default: 405 return err 406 } 407 } 408 } 409 410 func (m *archiveManager) copyFileFromBeginning(ctx context.Context, 411 srcDirFS billy.Filesystem, entryPathWithinJob string, 412 localPath string, mode os.FileMode, 413 bytesCopiedUpdater bytesUpdaterFunc) (sha256Sum []byte, err error) { 414 m.simpleFS.log.CDebugf(ctx, "+ copyFileFromBeginning %s", entryPathWithinJob) 415 defer func() { m.simpleFS.log.CDebugf(ctx, "- copyFileFromBeginning %s err: %v", entryPathWithinJob, err) }() 416 417 src, err := srcDirFS.Open(entryPathWithinJob) 418 if err != nil { 419 return nil, fmt.Errorf("srcDirFS.Open(%s) error: %v", entryPathWithinJob, err) 420 } 421 defer src.Close() 422 423 dst, err := os.OpenFile(localPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, mode) 424 if err != nil { 425 return nil, fmt.Errorf("os.OpenFile(%s) error: %v", localPath, err) 426 } 427 defer dst.Close() 428 429 teeReader := newSHA256TeeReader(src) 430 431 err = ctxAwareCopy(ctx, dst, teeReader, bytesCopiedUpdater) 432 if err != nil { 433 return nil, fmt.Errorf("[%s] io.CopyN error: %v", entryPathWithinJob, err) 434 } 435 436 // We didn't continue from a previously interrupted copy, so don't 437 // bother verifying the sha256sum and just return it. 438 return teeReader.getSum(), nil 439 } 440 441 func (m *archiveManager) copyFilePickupPrevious(ctx context.Context, 442 srcDirFS billy.Filesystem, entryPathWithinJob string, 443 localPath string, srcSeekOffset int64, mode os.FileMode, 444 bytesCopiedUpdater bytesUpdaterFunc) (sha256Sum []byte, err error) { 445 m.simpleFS.log.CDebugf(ctx, "+ copyFilePickupPrevious %s", entryPathWithinJob) 446 defer func() { m.simpleFS.log.CDebugf(ctx, "- copyFilePickupPrevious %s err: %v", entryPathWithinJob, err) }() 447 448 src, err := srcDirFS.Open(entryPathWithinJob) 449 if err != nil { 450 return nil, fmt.Errorf("srcDirFS.Open(%s) error: %v", entryPathWithinJob, err) 451 } 452 defer src.Close() 453 454 _, err = src.Seek(srcSeekOffset, io.SeekStart) 455 if err != nil { 456 return nil, fmt.Errorf("[%s] src.Seek error: %v", entryPathWithinJob, err) 457 } 458 459 // Copy the file. 460 if err = func() error { 461 dst, err := os.OpenFile(localPath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, mode) 462 if err != nil { 463 return fmt.Errorf("os.OpenFile(%s) error: %v", localPath, err) 464 } 465 defer dst.Close() 466 467 err = ctxAwareCopy(ctx, dst, src, bytesCopiedUpdater) 468 if err != nil { 469 return fmt.Errorf("[%s] io.CopyN error: %v", entryPathWithinJob, err) 470 } 471 472 return nil 473 }(); err != nil { 474 return nil, err 475 } 476 477 var size int64 478 // Calculate sha256 and check the sha256 of the copied file since we 479 // continued from a previously interrupted copy. 480 srcSHA256Sum, dstSHA256Sum, err := func() (srcSHA256Sum, dstSHA256Sum []byte, err error) { 481 _, err = src.Seek(0, io.SeekStart) 482 if err != nil { 483 return nil, nil, fmt.Errorf("[%s] src.Seek error: %v", entryPathWithinJob, err) 484 } 485 srcSHA256SumHasher := sha256.New() 486 size, err = io.Copy(srcSHA256SumHasher, src) 487 if err != nil { 488 return nil, nil, fmt.Errorf("[%s] io.Copy error: %v", entryPathWithinJob, err) 489 } 490 srcSHA256Sum = srcSHA256SumHasher.Sum(nil) 491 492 dst, err := os.Open(localPath) 493 if err != nil { 494 return nil, nil, fmt.Errorf("os.Open(%s) error: %v", localPath, err) 495 } 496 defer dst.Close() 497 dstSHA256SumHasher := sha256.New() 498 _, err = io.Copy(dstSHA256SumHasher, dst) 499 if err != nil { 500 return nil, nil, fmt.Errorf("[%s] io.Copy error: %v", entryPathWithinJob, err) 501 } 502 dstSHA256Sum = dstSHA256SumHasher.Sum(nil) 503 504 return srcSHA256Sum, dstSHA256Sum, nil 505 }() 506 if err != nil { 507 return nil, err 508 } 509 510 if !bytes.Equal(srcSHA256Sum, dstSHA256Sum) { 511 m.simpleFS.log.CInfof(ctx, 512 "file corruption is detected from a previous copy. Will copy from the beginning: ", 513 entryPathWithinJob) 514 bytesCopiedUpdater(-size) 515 return m.copyFileFromBeginning(ctx, srcDirFS, entryPathWithinJob, localPath, mode, bytesCopiedUpdater) 516 } 517 518 return srcSHA256Sum, nil 519 } 520 521 func (m *archiveManager) copyFile(ctx context.Context, 522 srcDirFS billy.Filesystem, entryPathWithinJob string, 523 localPath string, srcSeekOffset int64, mode os.FileMode, 524 bytesCopiedUpdater bytesUpdaterFunc) (sha256Sum []byte, err error) { 525 if srcSeekOffset == 0 { 526 return m.copyFileFromBeginning(ctx, srcDirFS, entryPathWithinJob, localPath, mode, bytesCopiedUpdater) 527 } 528 return m.copyFilePickupPrevious(ctx, srcDirFS, entryPathWithinJob, localPath, srcSeekOffset, mode, bytesCopiedUpdater) 529 } 530 531 func getWorkspaceDir(jobDesc keybase1.SimpleFSArchiveJobDesc) string { 532 return filepath.Join(jobDesc.StagingPath, "workspace") 533 } 534 535 func (m *archiveManager) doCopying(ctx context.Context, jobID string) (err error) { 536 m.simpleFS.log.CDebugf(ctx, "+ doCopying %s", jobID) 537 defer func() { m.simpleFS.log.CDebugf(ctx, "- doCopying %s err: %v", jobID, err) }() 538 539 desc, manifest := func() (keybase1.SimpleFSArchiveJobDesc, map[string]keybase1.SimpleFSArchiveFile) { 540 m.mu.Lock() 541 defer m.mu.Unlock() 542 manifest := make(map[string]keybase1.SimpleFSArchiveFile) 543 for k, v := range m.state.Jobs[jobID].Manifest { 544 manifest[k] = v.DeepCopy() 545 } 546 return m.state.Jobs[jobID].Desc, manifest 547 }() 548 549 updateManifest := func(manifest map[string]keybase1.SimpleFSArchiveFile) { 550 m.mu.Lock() 551 defer m.mu.Unlock() 552 // Can override directly since only one worker can work on a give job at a time. 553 job := m.state.Jobs[jobID] 554 for k, v := range manifest { 555 job.Manifest[k] = v.DeepCopy() 556 } 557 m.state.Jobs[jobID] = job 558 } 559 560 updateBytesCopied := func(delta int64) { 561 m.mu.Lock() 562 defer m.mu.Unlock() 563 // Can override directly since only one worker can work on a give job at a time. 564 job := m.state.Jobs[jobID] 565 job.BytesCopied += delta 566 m.state.Jobs[jobID] = job 567 } 568 569 srcContainingDirFS, finalElem, err := m.simpleFS.getFSIfExists(ctx, 570 keybase1.NewPathWithKbfsArchived(desc.KbfsPathWithRevision)) 571 if err != nil { 572 return fmt.Errorf("getFSIfExists error: %v", err) 573 } 574 srcDirFS, err := srcContainingDirFS.Chroot(finalElem) 575 if err != nil { 576 return fmt.Errorf("srcContainingDirFS.Chroot error: %v", err) 577 } 578 dstBase := filepath.Join(getWorkspaceDir(desc), desc.TargetName) 579 580 entryPaths := make([]string, 0, len(manifest)) 581 for entryPathWithinJob := range manifest { 582 entryPaths = append(entryPaths, entryPathWithinJob) 583 } 584 sort.Strings(entryPaths) 585 586 loopEntryPaths: 587 for _, entryPathWithinJob := range entryPaths { 588 entry := manifest[entryPathWithinJob] 589 entry.State = keybase1.SimpleFSFileArchiveState_InProgress 590 manifest[entryPathWithinJob] = entry 591 updateManifest(manifest) 592 593 localPath := filepath.Join(dstBase, entryPathWithinJob) 594 srcFI, err := srcDirFS.Lstat(entryPathWithinJob) 595 if err != nil { 596 return fmt.Errorf("srcDirFS.LStat(%s) error: %v", entryPathWithinJob, err) 597 } 598 switch { 599 case srcFI.IsDir(): 600 err = os.MkdirAll(localPath, 0755) 601 if err != nil { 602 return fmt.Errorf("os.MkdirAll(%s) error: %v", localPath, err) 603 } 604 err = os.Chtimes(localPath, time.Time{}, srcFI.ModTime()) 605 if err != nil { 606 return fmt.Errorf("os.Chtimes(%s) error: %v", localPath, err) 607 } 608 entry.State = keybase1.SimpleFSFileArchiveState_Complete 609 manifest[entryPathWithinJob] = entry 610 case srcFI.Mode()&os.ModeSymlink != 0: // symlink 611 err = os.MkdirAll(filepath.Dir(localPath), 0755) 612 if err != nil { 613 return fmt.Errorf("os.MkdirAll(filepath.Dir(%s)) error: %v", localPath, err) 614 } 615 // Call Stat, which follows symlinks, to make sure the link doesn't 616 // escape outside the srcDirFS. 617 _, err = srcDirFS.Stat(entryPathWithinJob) 618 if err != nil { 619 m.simpleFS.log.CWarningf(ctx, "skipping %s due to srcDirFS.Stat error: %v", entryPathWithinJob, err) 620 entry.State = keybase1.SimpleFSFileArchiveState_Skipped 621 manifest[entryPathWithinJob] = entry 622 continue loopEntryPaths 623 } 624 625 link, err := srcDirFS.Readlink(entryPathWithinJob) 626 if err != nil { 627 return fmt.Errorf("srcDirFS(%s) error: %v", entryPathWithinJob, err) 628 } 629 m.simpleFS.log.CInfof(ctx, "calling os.Symlink(%s, %s) ", link, localPath) 630 err = os.Symlink(link, localPath) 631 if err != nil { 632 return fmt.Errorf("os.Symlink(%s, %s) error: %v", link, localPath, err) 633 } 634 // Skipping Chtimes becasue there doesn't seem to be a way to 635 // change time on symlinks. 636 entry.State = keybase1.SimpleFSFileArchiveState_Complete 637 manifest[entryPathWithinJob] = entry 638 default: 639 err = os.MkdirAll(filepath.Dir(localPath), 0755) 640 if err != nil { 641 return fmt.Errorf("os.MkdirAll(filepath.Dir(%s)) error: %v", localPath, err) 642 } 643 644 var mode os.FileMode = 0644 645 if srcFI.Mode()&0100 != 0 { 646 mode = 0755 647 } 648 649 seek := int64(0) 650 651 dstFI, err := os.Lstat(localPath) 652 switch { 653 case os.IsNotExist(err): // simple copy from the start of file 654 case err == nil: // continue from a previously interrupted copy 655 if srcFI.Mode()&os.ModeSymlink == 0 { 656 seek = dstFI.Size() 657 } 658 // otherwise copy from the start of file 659 default: 660 return fmt.Errorf("os.Lstat(%s) error: %v", localPath, err) 661 } 662 663 sha256Sum, err := m.copyFile(ctx, 664 srcDirFS, entryPathWithinJob, localPath, seek, mode, updateBytesCopied) 665 if err != nil { 666 return err 667 } 668 669 err = os.Chtimes(localPath, time.Time{}, srcFI.ModTime()) 670 if err != nil { 671 return fmt.Errorf("os.Chtimes(%s) error: %v", localPath, err) 672 } 673 674 entry.Sha256SumHex = hex.EncodeToString(sha256Sum) 675 entry.State = keybase1.SimpleFSFileArchiveState_Complete 676 manifest[entryPathWithinJob] = entry 677 } 678 updateManifest(manifest) 679 } 680 681 return nil 682 } 683 684 func (m *archiveManager) copyingWorker(ctx context.Context) { 685 for { 686 select { 687 case <-ctx.Done(): 688 return 689 case <-m.copyingWorkerSignal: 690 } 691 692 jobID, jobCtx, ok := m.startWorkerTask(ctx, 693 keybase1.SimpleFSArchiveJobPhase_Indexed, 694 keybase1.SimpleFSArchiveJobPhase_Copying) 695 696 if !ok { 697 continue 698 } 699 // We got a task. Put another token into the signal channel so we 700 // check again on the next iteration. 701 m.signal(m.copyingWorkerSignal) 702 703 m.simpleFS.log.CDebugf(ctx, "copying: %s", jobID) 704 705 err := m.doCopying(jobCtx, jobID) 706 if err == nil { 707 m.simpleFS.log.CDebugf(jobCtx, "copying done on job %s", jobID) 708 m.changeJobPhase(jobCtx, jobID, keybase1.SimpleFSArchiveJobPhase_Copied) 709 m.signal(m.zippingWorkerSignal) // Done copying! Notify the zipping worker. 710 } else { 711 m.simpleFS.log.CErrorf(jobCtx, "copying error on job %s: %v", jobID, err) 712 m.setJobError(ctx, jobID, err) 713 } 714 715 err = m.flushStateFile(ctx) 716 if err != nil { 717 m.simpleFS.log.CWarningf(ctx, "m.flushStateFileLocked error: %v", err) 718 } 719 } 720 } 721 722 // zipWriterAddDir is adapted from zip.Writer.AddFS in go1.22.0 source because 1) we're 723 // not on a version with this function yet, and 2) Go's AddFS doesn't support 724 // symlinks; 3) we need bytesZippedUpdater here and we need to use CopyN for it. 725 func zipWriterAddDir(ctx context.Context, 726 w *zip.Writer, dirPath string, bytesZippedUpdater bytesUpdaterFunc) error { 727 fsys := os.DirFS(dirPath) 728 return fs.WalkDir(fsys, ".", func(name string, d fs.DirEntry, err error) error { 729 if err != nil { 730 return err 731 } 732 if d.IsDir() { 733 return nil 734 } 735 info, err := d.Info() 736 if err != nil { 737 return err 738 } 739 if !(info.Mode() &^ fs.ModeSymlink).IsRegular() { 740 return errors.New("zip: cannot add non-regular file except symlink") 741 } 742 h, err := zip.FileInfoHeader(info) 743 if err != nil { 744 return err 745 } 746 h.Name = name 747 h.Method = zip.Deflate 748 fw, err := w.CreateHeader(h) 749 if err != nil { 750 return err 751 } 752 switch { 753 case info.Mode()&fs.ModeSymlink != 0: 754 target, err := os.Readlink(filepath.Join(dirPath, name)) 755 if err != nil { 756 return err 757 } 758 _, err = fw.Write([]byte(filepath.ToSlash(target))) 759 if err != nil { 760 return err 761 } 762 return nil 763 default: 764 f, err := fsys.Open(name) 765 if err != nil { 766 return err 767 } 768 defer f.Close() 769 ctxAwareCopy(ctx, fw, f, bytesZippedUpdater) 770 return nil 771 } 772 }) 773 } 774 775 func (m *archiveManager) doZipping(ctx context.Context, jobID string) (err error) { 776 m.simpleFS.log.CDebugf(ctx, "+ doZipping %s", jobID) 777 defer func() { m.simpleFS.log.CDebugf(ctx, "- doZipping %s err: %v", jobID, err) }() 778 779 jobDesc := func() keybase1.SimpleFSArchiveJobDesc { 780 m.mu.Lock() 781 defer m.mu.Unlock() 782 return m.state.Jobs[jobID].Desc 783 }() 784 785 // Reset BytesZipped. 786 func() { 787 m.mu.Lock() 788 defer m.mu.Unlock() 789 // Can override directly since only one worker can work on a give job at a time. 790 job := m.state.Jobs[jobID] 791 job.BytesZipped = 0 792 m.state.Jobs[jobID] = job 793 }() 794 795 updateBytesZipped := func(delta int64) { 796 m.mu.Lock() 797 defer m.mu.Unlock() 798 // Can override directly since only one worker can work on a give job at a time. 799 job := m.state.Jobs[jobID] 800 job.BytesZipped += delta 801 m.state.Jobs[jobID] = job 802 } 803 804 workspaceDir := getWorkspaceDir(jobDesc) 805 806 err = func() (err error) { 807 mode := os.O_WRONLY | os.O_CREATE | os.O_EXCL 808 if jobDesc.OverwriteZip { 809 mode = os.O_WRONLY | os.O_CREATE | os.O_TRUNC 810 } 811 zipFile, err := os.OpenFile(jobDesc.ZipFilePath, mode, 0666) 812 if err != nil { 813 return fmt.Errorf("os.Create(%s) error: %v", jobDesc.ZipFilePath, err) 814 } 815 defer func() { 816 closeErr := zipFile.Close() 817 if err == nil { 818 err = closeErr 819 } 820 }() 821 822 zipWriter := zip.NewWriter(zipFile) 823 defer func() { 824 closeErr := zipWriter.Close() 825 if err == nil { 826 err = closeErr 827 } 828 }() 829 830 err = zipWriterAddDir(ctx, zipWriter, workspaceDir, updateBytesZipped) 831 if err != nil { 832 return fmt.Errorf("zipWriter.AddFS to %s error: %v", jobDesc.ZipFilePath, err) 833 } 834 835 return nil 836 }() 837 if err != nil { 838 return err 839 } 840 841 // Remove the workspace so we release the storage space early on before 842 // user dismisses the job. 843 err = os.RemoveAll(workspaceDir) 844 if err != nil { 845 m.simpleFS.log.CWarningf(ctx, "removing workspace %s error %v", workspaceDir, err) 846 } 847 848 return nil 849 } 850 851 func (m *archiveManager) zippingWorker(ctx context.Context) { 852 for { 853 select { 854 case <-ctx.Done(): 855 return 856 case <-m.zippingWorkerSignal: 857 } 858 859 jobID, jobCtx, ok := m.startWorkerTask(ctx, 860 keybase1.SimpleFSArchiveJobPhase_Copied, 861 keybase1.SimpleFSArchiveJobPhase_Zipping) 862 863 if !ok { 864 continue 865 } 866 // We got a task. Put another token into the signal channel so we 867 // check again on the next iteration. 868 m.signal(m.zippingWorkerSignal) 869 870 m.simpleFS.log.CDebugf(ctx, "zipping: %s", jobID) 871 872 err := m.doZipping(jobCtx, jobID) 873 if err == nil { 874 m.simpleFS.log.CDebugf(jobCtx, "zipping done on job %s", jobID) 875 m.changeJobPhase(jobCtx, jobID, keybase1.SimpleFSArchiveJobPhase_Done) 876 } else { 877 m.simpleFS.log.CErrorf(jobCtx, "zipping error on job %s: %v", jobID, err) 878 m.setJobError(ctx, jobID, err) 879 } 880 881 err = m.flushStateFile(ctx) 882 if err != nil { 883 m.simpleFS.log.CWarningf(ctx, "m.flushStateFileLocked error: %v", err) 884 } 885 } 886 } 887 888 func (m *archiveManager) resetInterruptedPhaseLocked(ctx context.Context, jobID string) (changed bool) { 889 switch m.state.Jobs[jobID].Phase { 890 case keybase1.SimpleFSArchiveJobPhase_Indexing: 891 m.simpleFS.log.CDebugf(ctx, "resetting %s phase from %s to %s", jobID, 892 keybase1.SimpleFSArchiveJobPhase_Indexing, 893 keybase1.SimpleFSArchiveJobPhase_Queued) 894 m.changeJobPhaseLocked(ctx, jobID, 895 keybase1.SimpleFSArchiveJobPhase_Queued) 896 return true 897 case keybase1.SimpleFSArchiveJobPhase_Copying: 898 m.simpleFS.log.CDebugf(ctx, "resetting %s phase from %s to %s", jobID, 899 keybase1.SimpleFSArchiveJobPhase_Copying, 900 keybase1.SimpleFSArchiveJobPhase_Indexed) 901 m.changeJobPhaseLocked(ctx, jobID, 902 keybase1.SimpleFSArchiveJobPhase_Indexed) 903 return true 904 case keybase1.SimpleFSArchiveJobPhase_Zipping: 905 m.simpleFS.log.CDebugf(ctx, "resetting %s phase from %s to %s", jobID, 906 keybase1.SimpleFSArchiveJobPhase_Zipping, 907 keybase1.SimpleFSArchiveJobPhase_Copied) 908 m.changeJobPhaseLocked(ctx, jobID, 909 keybase1.SimpleFSArchiveJobPhase_Copied) 910 return true 911 default: 912 m.simpleFS.log.CDebugf(ctx, "not resetting %s phase from %s", jobID, 913 m.state.Jobs[jobID].Phase) 914 return false 915 } 916 } 917 918 func (m *archiveManager) errorRetryWorker(ctx context.Context) { 919 ticker := time.NewTicker(time.Second * 5) 920 for { 921 select { 922 case <-ctx.Done(): 923 return 924 case <-ticker.C: 925 } 926 927 func() { 928 m.mu.Lock() 929 defer m.mu.Unlock() 930 jobIDs := make([]string, len(m.state.Jobs)) 931 for jobID := range m.state.Jobs { 932 jobIDs = append(jobIDs, jobID) 933 } 934 loopJobIDs: 935 for _, jobID := range jobIDs { 936 errState, ok := m.errors[jobID] 937 if !ok { 938 continue loopJobIDs 939 } 940 if time.Now().Before(errState.nextRetry) { 941 continue loopJobIDs 942 } 943 m.simpleFS.log.CDebugf(ctx, "retrying job %s", jobID) 944 changed := m.resetInterruptedPhaseLocked(ctx, jobID) 945 if !changed { 946 m.simpleFS.log.CWarningf(ctx, 947 "job %s has an error state %v but an unexpected job phase", 948 jobID, errState.err) 949 continue loopJobIDs 950 } 951 delete(m.errors, jobID) 952 953 m.signal(m.indexingWorkerSignal) 954 m.signal(m.copyingWorkerSignal) 955 m.signal(m.zippingWorkerSignal) 956 } 957 }() 958 } 959 } 960 961 func (m *archiveManager) start() { 962 ctx := context.Background() 963 ctx, m.ctxCancel = context.WithCancel(ctx) 964 go m.indexingWorker(m.simpleFS.makeContext(ctx)) 965 go m.copyingWorker(m.simpleFS.makeContext(ctx)) 966 go m.zippingWorker(m.simpleFS.makeContext(ctx)) 967 go m.errorRetryWorker(m.simpleFS.makeContext(ctx)) 968 m.signal(m.indexingWorkerSignal) 969 m.signal(m.copyingWorkerSignal) 970 m.signal(m.zippingWorkerSignal) 971 } 972 973 func (m *archiveManager) resetInterruptedPhasesLocked(ctx context.Context) { 974 // We don't resume indexing and zipping work, so just reset them here. 975 // Copying is resumable but we have per file state tracking so reset the 976 // phase here as well. 977 for jobID := range m.state.Jobs { 978 _ = m.resetInterruptedPhaseLocked(ctx, jobID) 979 } 980 } 981 982 func newArchiveManager(simpleFS *SimpleFS) (m *archiveManager, err error) { 983 ctx := context.Background() 984 simpleFS.log.CDebugf(ctx, "+ newArchiveManager") 985 defer simpleFS.log.CDebugf(ctx, "- newArchiveManager") 986 m = &archiveManager{ 987 simpleFS: simpleFS, 988 jobCtxCancellers: make(map[string]func()), 989 errors: make(map[string]errorState), 990 indexingWorkerSignal: make(chan struct{}, 1), 991 copyingWorkerSignal: make(chan struct{}, 1), 992 zippingWorkerSignal: make(chan struct{}, 1), 993 } 994 stateFilePath := getStateFilePath(simpleFS) 995 m.state, err = loadArchiveStateFromJsonGz(ctx, simpleFS, stateFilePath) 996 switch err { 997 case nil: 998 if m.state.Jobs == nil { 999 m.state.Jobs = make(map[string]keybase1.SimpleFSArchiveJobState) 1000 } 1001 m.resetInterruptedPhasesLocked(ctx) 1002 default: 1003 simpleFS.log.CErrorf(ctx, "loadArchiveStateFromJsonGz error ( %v ). Creating a new state.", err) 1004 m.state = &keybase1.SimpleFSArchiveState{ 1005 Jobs: make(map[string]keybase1.SimpleFSArchiveJobState), 1006 } 1007 err = writeArchiveStateIntoJsonGz(ctx, simpleFS, stateFilePath, m.state) 1008 if err != nil { 1009 simpleFS.log.CErrorf(ctx, "newArchiveManager: creating state file error: %v", err) 1010 return nil, err 1011 } 1012 } 1013 m.start() 1014 return m, nil 1015 }