github.com/keybase/client/go@v0.0.0-20240424154521-52f30ea26cb1/kbfs/simplefs/archive.go (about) 1 // Copyright 2024 Keybase, Inc. All rights reserved. Use of 2 // this source code is governed by the included BSD license. 3 4 package simplefs 5 6 import ( 7 "archive/zip" 8 "bytes" 9 "compress/gzip" 10 "crypto/sha256" 11 "encoding/hex" 12 "encoding/json" 13 "fmt" 14 "hash" 15 "io" 16 "io/fs" 17 "os" 18 "path/filepath" 19 "sort" 20 "sync" 21 "time" 22 23 "golang.org/x/time/rate" 24 25 "github.com/keybase/client/go/protocol/keybase1" 26 "github.com/pkg/errors" 27 "golang.org/x/net/context" 28 "gopkg.in/src-d/go-billy.v4" 29 ) 30 31 func loadArchiveStateFromJsonGz(ctx context.Context, simpleFS *SimpleFS, filePath string) (state *keybase1.SimpleFSArchiveState, err error) { 32 f, err := os.Open(filePath) 33 if err != nil { 34 simpleFS.log.CErrorf(ctx, "loadArchiveStateFromJsonGz: opening state file error: %v", err) 35 return nil, err 36 } 37 defer f.Close() 38 gzReader, err := gzip.NewReader(f) 39 if err != nil { 40 simpleFS.log.CErrorf(ctx, "loadArchiveStateFromJsonGz: creating gzip reader error: %v", err) 41 return nil, err 42 } 43 decoder := json.NewDecoder(gzReader) 44 err = decoder.Decode(&state) 45 if err != nil { 46 simpleFS.log.CErrorf(ctx, "loadArchiveStateFromJsonGz: decoding state file error: %v", err) 47 return nil, err 48 } 49 return state, nil 50 } 51 52 func writeArchiveStateIntoJsonGz(ctx context.Context, simpleFS *SimpleFS, filePath string, s *keybase1.SimpleFSArchiveState) error { 53 err := os.MkdirAll(filepath.Dir(filePath), 0755) 54 if err != nil { 55 simpleFS.log.CErrorf(ctx, "writeArchiveStateIntoJsonGz: os.MkdirAll error: %v", err) 56 return err 57 } 58 f, err := os.Create(filePath) 59 if err != nil { 60 simpleFS.log.CErrorf(ctx, "writeArchiveStateIntoJsonGz: creating state file error: %v", err) 61 return err 62 } 63 defer f.Close() 64 65 gzWriter := gzip.NewWriter(f) 66 defer gzWriter.Close() 67 68 encoder := json.NewEncoder(gzWriter) 69 err = encoder.Encode(s) 70 if err != nil { 71 simpleFS.log.CErrorf(ctx, "writeArchiveStateIntoJsonGz: encoding state file error: %v", err) 72 return err 73 } 74 75 return nil 76 } 77 78 type errorState struct { 79 err error 80 nextRetry time.Time 81 } 82 83 type archiveManager struct { 84 simpleFS *SimpleFS 85 86 // Just use a regular mutex rather than a rw one so all writes to 87 // persistent storage are synchronized. 88 mu sync.Mutex 89 state *keybase1.SimpleFSArchiveState 90 jobCtxCancellers map[string]func() 91 // jobID -> errorState. Populated when an error has happened. It's only 92 // valid for these phases: 93 // 94 // keybase1.SimpleFSArchiveJobPhase_Indexing 95 // keybase1.SimpleFSArchiveJobPhase_Copying 96 // keybase1.SimpleFSArchiveJobPhase_Zipping 97 // 98 // When nextRetry is current errorRetryWorker delete the errorState from 99 // this map, while also putting them back to the previous phase so the 100 // worker can pick it up. 101 errors map[string]errorState 102 103 indexingWorkerSignal chan struct{} 104 copyingWorkerSignal chan struct{} 105 zippingWorkerSignal chan struct{} 106 notifyUIStateChangeSignal chan struct{} 107 108 ctxCancel func() 109 } 110 111 func getStateFilePath(simpleFS *SimpleFS) string { 112 username := simpleFS.config.KbEnv().GetUsername() 113 cacheDir := simpleFS.getCacheDir() 114 return filepath.Join(cacheDir, fmt.Sprintf("kbfs-archive-%s.json.gz", username)) 115 } 116 117 func (m *archiveManager) flushStateFileLocked(ctx context.Context) error { 118 select { 119 case <-ctx.Done(): 120 return ctx.Err() 121 default: 122 } 123 err := writeArchiveStateIntoJsonGz(ctx, m.simpleFS, getStateFilePath(m.simpleFS), m.state) 124 if err != nil { 125 m.simpleFS.log.CErrorf(ctx, 126 "archiveManager.flushStateFileLocked: writing state file error: %v", err) 127 return err 128 } 129 return nil 130 } 131 132 func (m *archiveManager) flushStateFile(ctx context.Context) error { 133 m.mu.Lock() 134 defer m.mu.Unlock() 135 return m.flushStateFileLocked(ctx) 136 } 137 138 func (m *archiveManager) signal(ch chan struct{}) { 139 select { 140 case ch <- struct{}{}: 141 default: 142 // There's already a signal in the chan. Skipping this. 143 } 144 } 145 146 func (m *archiveManager) shutdown(ctx context.Context) { 147 // OK to cancel before flushStateFileLocked because we'll pass in the 148 // shutdown ctx there. 149 if m.ctxCancel != nil { 150 m.ctxCancel() 151 } 152 153 m.mu.Lock() 154 defer m.mu.Unlock() 155 err := m.flushStateFileLocked(ctx) 156 if err != nil { 157 m.simpleFS.log.CWarningf(ctx, "m.flushStateFileLocked error: %v", err) 158 } 159 } 160 161 func (m *archiveManager) notifyUIStateChange(ctx context.Context) { 162 m.simpleFS.log.CDebugf(ctx, "+ archiveManager.notifyUIStateChange") 163 defer m.simpleFS.log.CDebugf(ctx, "- archiveManager.notifyUIStateChange") 164 m.mu.Lock() 165 defer m.mu.Unlock() 166 state, errorStates := m.getCurrentStateLocked(ctx) 167 m.simpleFS.notifyUIStateChange(ctx, state, errorStates) 168 } 169 170 func (m *archiveManager) startJob(ctx context.Context, job keybase1.SimpleFSArchiveJobDesc) error { 171 m.simpleFS.log.CDebugf(ctx, "+ archiveManager.startJob %#+v", job) 172 defer m.simpleFS.log.CDebugf(ctx, "- archiveManager.startJob") 173 174 m.mu.Lock() 175 defer m.mu.Unlock() 176 if _, ok := m.state.Jobs[job.JobID]; ok { 177 return errors.New("job ID already exists") 178 } 179 m.state.Jobs[job.JobID] = keybase1.SimpleFSArchiveJobState{ 180 Desc: job, 181 Phase: keybase1.SimpleFSArchiveJobPhase_Queued, 182 } 183 m.state.LastUpdated = keybase1.ToTime(time.Now()) 184 m.signal(m.notifyUIStateChangeSignal) 185 m.signal(m.indexingWorkerSignal) 186 return m.flushStateFileLocked(ctx) 187 } 188 189 func (m *archiveManager) cancelOrDismissJob(ctx context.Context, 190 jobID string) (err error) { 191 m.simpleFS.log.CDebugf(ctx, "+ archiveManager.cancelOrDismissJob") 192 defer m.simpleFS.log.CDebugf(ctx, "- archiveManager.cancelOrDismissJob %s", jobID) 193 m.mu.Lock() 194 defer m.mu.Unlock() 195 196 if cancel, ok := m.jobCtxCancellers[jobID]; ok { 197 cancel() 198 delete(m.jobCtxCancellers, jobID) 199 } 200 201 job, ok := m.state.Jobs[jobID] 202 if !ok { 203 return errors.New("job not found") 204 } 205 delete(m.state.Jobs, jobID) 206 207 err = os.RemoveAll(job.Desc.StagingPath) 208 if err != nil { 209 m.simpleFS.log.CWarningf(ctx, "removing staging path %q for job %s error: %v", 210 job.Desc.StagingPath, jobID, err) 211 } 212 213 m.signal(m.notifyUIStateChangeSignal) 214 return nil 215 } 216 217 func (m *archiveManager) getCurrentStateLocked(ctx context.Context) ( 218 state keybase1.SimpleFSArchiveState, errorStates map[string]errorState) { 219 errorStates = make(map[string]errorState) 220 for jobID, errState := range m.errors { 221 errorStates[jobID] = errState 222 } 223 return m.state.DeepCopy(), errorStates 224 } 225 226 func (m *archiveManager) getCurrentState(ctx context.Context) ( 227 state keybase1.SimpleFSArchiveState, errorStates map[string]errorState) { 228 m.simpleFS.log.CDebugf(ctx, "+ archiveManager.getCurrentState") 229 defer m.simpleFS.log.CDebugf(ctx, "- archiveManager.getCurrentState") 230 m.mu.Lock() 231 defer m.mu.Unlock() 232 return m.getCurrentStateLocked(ctx) 233 } 234 235 func (m *archiveManager) changeJobPhaseLocked(ctx context.Context, 236 jobID string, newPhase keybase1.SimpleFSArchiveJobPhase) { 237 copy, ok := m.state.Jobs[jobID] 238 if !ok { 239 m.simpleFS.log.CWarningf(ctx, "job %s not found. it might have been canceled", jobID) 240 return 241 } 242 copy.Phase = newPhase 243 m.state.Jobs[jobID] = copy 244 m.signal(m.notifyUIStateChangeSignal) 245 } 246 func (m *archiveManager) changeJobPhase(ctx context.Context, 247 jobID string, newPhase keybase1.SimpleFSArchiveJobPhase) { 248 m.mu.Lock() 249 defer m.mu.Unlock() 250 m.changeJobPhaseLocked(ctx, jobID, newPhase) 251 } 252 253 func (m *archiveManager) startWorkerTask(ctx context.Context, 254 eligiblePhase keybase1.SimpleFSArchiveJobPhase, 255 newPhase keybase1.SimpleFSArchiveJobPhase) (jobID string, jobCtx context.Context, ok bool) { 256 jobCtx, cancel := context.WithCancel(ctx) 257 m.mu.Lock() 258 defer m.mu.Unlock() 259 for jobID := range m.state.Jobs { 260 if m.state.Jobs[jobID].Phase == eligiblePhase { 261 m.changeJobPhaseLocked(ctx, jobID, newPhase) 262 m.jobCtxCancellers[jobID] = cancel 263 return jobID, jobCtx, true 264 } 265 } 266 return "", nil, false 267 } 268 269 const archiveErrorRetryDuration = time.Minute 270 271 func (m *archiveManager) setJobError( 272 ctx context.Context, jobID string, err error) { 273 m.mu.Lock() 274 defer m.mu.Unlock() 275 nextRetry := time.Now().Add(archiveErrorRetryDuration) 276 m.simpleFS.log.CErrorf(ctx, "job %s nextRetry: %s", jobID, nextRetry) 277 m.errors[jobID] = errorState{ 278 err: err, 279 nextRetry: nextRetry, 280 } 281 } 282 283 func (m *archiveManager) doIndexing(ctx context.Context, jobID string) (err error) { 284 m.simpleFS.log.CDebugf(ctx, "+ doIndexing %s", jobID) 285 defer func() { m.simpleFS.log.CDebugf(ctx, "- doIndexing %s err: %v", jobID, err) }() 286 287 jobDesc := func() keybase1.SimpleFSArchiveJobDesc { 288 m.mu.Lock() 289 defer m.mu.Unlock() 290 return m.state.Jobs[jobID].Desc 291 }() 292 opid, err := m.simpleFS.SimpleFSMakeOpid(ctx) 293 if err != nil { 294 return err 295 } 296 defer m.simpleFS.SimpleFSClose(ctx, opid) 297 filter := keybase1.ListFilter_NO_FILTER 298 err = m.simpleFS.SimpleFSListRecursive(ctx, keybase1.SimpleFSListRecursiveArg{ 299 OpID: opid, 300 Path: keybase1.NewPathWithKbfsArchived(jobDesc.KbfsPathWithRevision), 301 Filter: filter, 302 }) 303 err = m.simpleFS.SimpleFSWait(ctx, opid) 304 if err != nil { 305 return err 306 } 307 308 listResult, err := m.simpleFS.SimpleFSReadList(ctx, opid) 309 if err != nil { 310 return err 311 } 312 313 var bytesTotal int64 314 manifest := make(map[string]keybase1.SimpleFSArchiveFile) 315 for _, e := range listResult.Entries { 316 manifest[e.Name] = keybase1.SimpleFSArchiveFile{ 317 State: keybase1.SimpleFSFileArchiveState_ToDo, 318 DirentType: e.DirentType, 319 } 320 if e.DirentType == keybase1.DirentType_FILE || 321 e.DirentType == keybase1.DirentType_EXEC { 322 bytesTotal += int64(e.Size) 323 } 324 } 325 326 func() { 327 m.mu.Lock() 328 defer m.mu.Unlock() 329 330 jobCopy, ok := m.state.Jobs[jobID] 331 if !ok { 332 m.simpleFS.log.CWarningf(ctx, "job %s not found. it might have been canceled", jobID) 333 return 334 } 335 jobCopy.Manifest = manifest 336 jobCopy.BytesTotal = bytesTotal 337 m.state.Jobs[jobID] = jobCopy 338 m.signal(m.notifyUIStateChangeSignal) 339 }() 340 return nil 341 } 342 343 func (m *archiveManager) indexingWorker(ctx context.Context) { 344 for { 345 select { 346 case <-ctx.Done(): 347 return 348 case <-m.indexingWorkerSignal: 349 } 350 351 jobID, jobCtx, ok := m.startWorkerTask(ctx, 352 keybase1.SimpleFSArchiveJobPhase_Queued, 353 keybase1.SimpleFSArchiveJobPhase_Indexing) 354 355 if !ok { 356 continue 357 } 358 // We got a task. Put another token into the signal channel so we 359 // check again on the next iteration. 360 m.signal(m.indexingWorkerSignal) 361 362 m.simpleFS.log.CDebugf(ctx, "indexing: %s", jobID) 363 364 err := m.doIndexing(jobCtx, jobID) 365 if err == nil { 366 m.simpleFS.log.CDebugf(jobCtx, "indexing done on job %s", jobID) 367 m.changeJobPhase(jobCtx, jobID, keybase1.SimpleFSArchiveJobPhase_Indexed) 368 m.signal(m.copyingWorkerSignal) // Done indexing! Notify the copying worker. 369 } else { 370 m.simpleFS.log.CErrorf(jobCtx, "indexing error on job %s: %v", jobID, err) 371 m.setJobError(ctx, jobID, err) 372 } 373 374 err = m.flushStateFile(ctx) 375 if err != nil { 376 m.simpleFS.log.CWarningf(ctx, "m.flushStateFileLocked error: %v", err) 377 } 378 } 379 } 380 381 type sha256TeeReader struct { 382 inner io.Reader 383 innerTeeReader io.Reader 384 h hash.Hash 385 } 386 387 var _ io.Reader = (*sha256TeeReader)(nil) 388 389 // Read implements the io.Reader interface. 390 func (r *sha256TeeReader) Read(p []byte) (n int, err error) { 391 return r.innerTeeReader.Read(p) 392 } 393 394 func (r *sha256TeeReader) getSum() []byte { 395 return r.h.Sum(nil) 396 } 397 398 func newSHA256TeeReader(inner io.Reader) (r *sha256TeeReader) { 399 r = &sha256TeeReader{ 400 inner: inner, 401 h: sha256.New(), 402 } 403 r.innerTeeReader = io.TeeReader(r.inner, r.h) 404 return r 405 } 406 407 type bytesUpdaterFunc = func(delta int64) 408 409 func ctxAwareCopy( 410 ctx context.Context, to io.Writer, from io.Reader, 411 bytesUpdater bytesUpdaterFunc) error { 412 for { 413 select { 414 case <-ctx.Done(): 415 return ctx.Err() 416 default: 417 } 418 n, err := io.CopyN(to, from, 64*1024) 419 switch err { 420 case nil: 421 bytesUpdater(n) 422 case io.EOF: 423 bytesUpdater(n) 424 return nil 425 default: 426 return err 427 } 428 } 429 } 430 431 func (m *archiveManager) copyFileFromBeginning(ctx context.Context, 432 srcDirFS billy.Filesystem, entryPathWithinJob string, 433 localPath string, mode os.FileMode, 434 bytesCopiedUpdater bytesUpdaterFunc) (sha256Sum []byte, err error) { 435 m.simpleFS.log.CDebugf(ctx, "+ copyFileFromBeginning %s", entryPathWithinJob) 436 defer func() { m.simpleFS.log.CDebugf(ctx, "- copyFileFromBeginning %s err: %v", entryPathWithinJob, err) }() 437 438 src, err := srcDirFS.Open(entryPathWithinJob) 439 if err != nil { 440 return nil, fmt.Errorf("srcDirFS.Open(%s) error: %v", entryPathWithinJob, err) 441 } 442 defer src.Close() 443 444 dst, err := os.OpenFile(localPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, mode) 445 if err != nil { 446 return nil, fmt.Errorf("os.OpenFile(%s) error: %v", localPath, err) 447 } 448 defer dst.Close() 449 450 teeReader := newSHA256TeeReader(src) 451 452 err = ctxAwareCopy(ctx, dst, teeReader, bytesCopiedUpdater) 453 if err != nil { 454 return nil, fmt.Errorf("[%s] io.CopyN error: %v", entryPathWithinJob, err) 455 } 456 457 // We didn't continue from a previously interrupted copy, so don't 458 // bother verifying the sha256sum and just return it. 459 return teeReader.getSum(), nil 460 } 461 462 func (m *archiveManager) copyFilePickupPrevious(ctx context.Context, 463 srcDirFS billy.Filesystem, entryPathWithinJob string, 464 localPath string, srcSeekOffset int64, mode os.FileMode, 465 bytesCopiedUpdater bytesUpdaterFunc) (sha256Sum []byte, err error) { 466 m.simpleFS.log.CDebugf(ctx, "+ copyFilePickupPrevious %s", entryPathWithinJob) 467 defer func() { m.simpleFS.log.CDebugf(ctx, "- copyFilePickupPrevious %s err: %v", entryPathWithinJob, err) }() 468 469 src, err := srcDirFS.Open(entryPathWithinJob) 470 if err != nil { 471 return nil, fmt.Errorf("srcDirFS.Open(%s) error: %v", entryPathWithinJob, err) 472 } 473 defer src.Close() 474 475 _, err = src.Seek(srcSeekOffset, io.SeekStart) 476 if err != nil { 477 return nil, fmt.Errorf("[%s] src.Seek error: %v", entryPathWithinJob, err) 478 } 479 480 // Copy the file. 481 if err = func() error { 482 dst, err := os.OpenFile(localPath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, mode) 483 if err != nil { 484 return fmt.Errorf("os.OpenFile(%s) error: %v", localPath, err) 485 } 486 defer dst.Close() 487 488 err = ctxAwareCopy(ctx, dst, src, bytesCopiedUpdater) 489 if err != nil { 490 return fmt.Errorf("[%s] io.CopyN error: %v", entryPathWithinJob, err) 491 } 492 493 return nil 494 }(); err != nil { 495 return nil, err 496 } 497 498 var size int64 499 // Calculate sha256 and check the sha256 of the copied file since we 500 // continued from a previously interrupted copy. 501 srcSHA256Sum, dstSHA256Sum, err := func() (srcSHA256Sum, dstSHA256Sum []byte, err error) { 502 _, err = src.Seek(0, io.SeekStart) 503 if err != nil { 504 return nil, nil, fmt.Errorf("[%s] src.Seek error: %v", entryPathWithinJob, err) 505 } 506 srcSHA256SumHasher := sha256.New() 507 size, err = io.Copy(srcSHA256SumHasher, src) 508 if err != nil { 509 return nil, nil, fmt.Errorf("[%s] io.Copy error: %v", entryPathWithinJob, err) 510 } 511 srcSHA256Sum = srcSHA256SumHasher.Sum(nil) 512 513 dst, err := os.Open(localPath) 514 if err != nil { 515 return nil, nil, fmt.Errorf("os.Open(%s) error: %v", localPath, err) 516 } 517 defer dst.Close() 518 dstSHA256SumHasher := sha256.New() 519 _, err = io.Copy(dstSHA256SumHasher, dst) 520 if err != nil { 521 return nil, nil, fmt.Errorf("[%s] io.Copy error: %v", entryPathWithinJob, err) 522 } 523 dstSHA256Sum = dstSHA256SumHasher.Sum(nil) 524 525 return srcSHA256Sum, dstSHA256Sum, nil 526 }() 527 if err != nil { 528 return nil, err 529 } 530 531 if !bytes.Equal(srcSHA256Sum, dstSHA256Sum) { 532 m.simpleFS.log.CInfof(ctx, 533 "file corruption is detected from a previous copy. Will copy from the beginning: ", 534 entryPathWithinJob) 535 bytesCopiedUpdater(-size) 536 return m.copyFileFromBeginning(ctx, srcDirFS, entryPathWithinJob, localPath, mode, bytesCopiedUpdater) 537 } 538 539 return srcSHA256Sum, nil 540 } 541 542 func (m *archiveManager) copyFile(ctx context.Context, 543 srcDirFS billy.Filesystem, entryPathWithinJob string, 544 localPath string, srcSeekOffset int64, mode os.FileMode, 545 bytesCopiedUpdater bytesUpdaterFunc) (sha256Sum []byte, err error) { 546 if srcSeekOffset == 0 { 547 return m.copyFileFromBeginning(ctx, srcDirFS, entryPathWithinJob, localPath, mode, bytesCopiedUpdater) 548 } 549 return m.copyFilePickupPrevious(ctx, srcDirFS, entryPathWithinJob, localPath, srcSeekOffset, mode, bytesCopiedUpdater) 550 } 551 552 func getWorkspaceDir(jobDesc keybase1.SimpleFSArchiveJobDesc) string { 553 return filepath.Join(jobDesc.StagingPath, "workspace") 554 } 555 556 func (m *archiveManager) doCopying(ctx context.Context, jobID string) (err error) { 557 m.simpleFS.log.CDebugf(ctx, "+ doCopying %s", jobID) 558 defer func() { m.simpleFS.log.CDebugf(ctx, "- doCopying %s err: %v", jobID, err) }() 559 560 desc, manifest := func() (keybase1.SimpleFSArchiveJobDesc, map[string]keybase1.SimpleFSArchiveFile) { 561 m.mu.Lock() 562 defer m.mu.Unlock() 563 manifest := make(map[string]keybase1.SimpleFSArchiveFile) 564 for k, v := range m.state.Jobs[jobID].Manifest { 565 manifest[k] = v.DeepCopy() 566 } 567 return m.state.Jobs[jobID].Desc, manifest 568 }() 569 570 updateManifest := func(manifest map[string]keybase1.SimpleFSArchiveFile) { 571 m.mu.Lock() 572 defer m.mu.Unlock() 573 // Can override directly since only one worker can work on a give job at a time. 574 job := m.state.Jobs[jobID] 575 for k, v := range manifest { 576 job.Manifest[k] = v.DeepCopy() 577 } 578 m.state.Jobs[jobID] = job 579 m.signal(m.notifyUIStateChangeSignal) 580 } 581 582 updateBytesCopied := func(delta int64) { 583 m.mu.Lock() 584 defer m.mu.Unlock() 585 // Can override directly since only one worker can work on a give job at a time. 586 job := m.state.Jobs[jobID] 587 job.BytesCopied += delta 588 m.state.Jobs[jobID] = job 589 m.signal(m.notifyUIStateChangeSignal) 590 } 591 592 srcContainingDirFS, finalElem, err := m.simpleFS.getFSIfExists(ctx, 593 keybase1.NewPathWithKbfsArchived(desc.KbfsPathWithRevision)) 594 if err != nil { 595 return fmt.Errorf("getFSIfExists error: %v", err) 596 } 597 srcDirFS, err := srcContainingDirFS.Chroot(finalElem) 598 if err != nil { 599 return fmt.Errorf("srcContainingDirFS.Chroot error: %v", err) 600 } 601 dstBase := filepath.Join(getWorkspaceDir(desc), desc.TargetName) 602 603 entryPaths := make([]string, 0, len(manifest)) 604 for entryPathWithinJob := range manifest { 605 entryPaths = append(entryPaths, entryPathWithinJob) 606 } 607 sort.Strings(entryPaths) 608 609 loopEntryPaths: 610 for _, entryPathWithinJob := range entryPaths { 611 entry := manifest[entryPathWithinJob] 612 entry.State = keybase1.SimpleFSFileArchiveState_InProgress 613 manifest[entryPathWithinJob] = entry 614 updateManifest(manifest) 615 616 localPath := filepath.Join(dstBase, entryPathWithinJob) 617 srcFI, err := srcDirFS.Lstat(entryPathWithinJob) 618 if err != nil { 619 return fmt.Errorf("srcDirFS.LStat(%s) error: %v", entryPathWithinJob, err) 620 } 621 switch { 622 case srcFI.IsDir(): 623 err = os.MkdirAll(localPath, 0755) 624 if err != nil { 625 return fmt.Errorf("os.MkdirAll(%s) error: %v", localPath, err) 626 } 627 err = os.Chtimes(localPath, time.Time{}, srcFI.ModTime()) 628 if err != nil { 629 return fmt.Errorf("os.Chtimes(%s) error: %v", localPath, err) 630 } 631 entry.State = keybase1.SimpleFSFileArchiveState_Complete 632 manifest[entryPathWithinJob] = entry 633 case srcFI.Mode()&os.ModeSymlink != 0: // symlink 634 err = os.MkdirAll(filepath.Dir(localPath), 0755) 635 if err != nil { 636 return fmt.Errorf("os.MkdirAll(filepath.Dir(%s)) error: %v", localPath, err) 637 } 638 // Call Stat, which follows symlinks, to make sure the link doesn't 639 // escape outside the srcDirFS. 640 _, err = srcDirFS.Stat(entryPathWithinJob) 641 if err != nil { 642 m.simpleFS.log.CWarningf(ctx, "skipping %s due to srcDirFS.Stat error: %v", entryPathWithinJob, err) 643 entry.State = keybase1.SimpleFSFileArchiveState_Skipped 644 manifest[entryPathWithinJob] = entry 645 continue loopEntryPaths 646 } 647 648 link, err := srcDirFS.Readlink(entryPathWithinJob) 649 if err != nil { 650 return fmt.Errorf("srcDirFS(%s) error: %v", entryPathWithinJob, err) 651 } 652 m.simpleFS.log.CInfof(ctx, "calling os.Symlink(%s, %s) ", link, localPath) 653 err = os.Symlink(link, localPath) 654 if err != nil { 655 return fmt.Errorf("os.Symlink(%s, %s) error: %v", link, localPath, err) 656 } 657 // Skipping Chtimes becasue there doesn't seem to be a way to 658 // change time on symlinks. 659 entry.State = keybase1.SimpleFSFileArchiveState_Complete 660 manifest[entryPathWithinJob] = entry 661 default: 662 err = os.MkdirAll(filepath.Dir(localPath), 0755) 663 if err != nil { 664 return fmt.Errorf("os.MkdirAll(filepath.Dir(%s)) error: %v", localPath, err) 665 } 666 667 var mode os.FileMode = 0644 668 if srcFI.Mode()&0100 != 0 { 669 mode = 0755 670 } 671 672 seek := int64(0) 673 674 dstFI, err := os.Lstat(localPath) 675 switch { 676 case os.IsNotExist(err): // simple copy from the start of file 677 case err == nil: // continue from a previously interrupted copy 678 if srcFI.Mode()&os.ModeSymlink == 0 { 679 seek = dstFI.Size() 680 } 681 // otherwise copy from the start of file 682 default: 683 return fmt.Errorf("os.Lstat(%s) error: %v", localPath, err) 684 } 685 686 sha256Sum, err := m.copyFile(ctx, 687 srcDirFS, entryPathWithinJob, localPath, seek, mode, updateBytesCopied) 688 if err != nil { 689 return err 690 } 691 692 err = os.Chtimes(localPath, time.Time{}, srcFI.ModTime()) 693 if err != nil { 694 return fmt.Errorf("os.Chtimes(%s) error: %v", localPath, err) 695 } 696 697 entry.Sha256SumHex = hex.EncodeToString(sha256Sum) 698 entry.State = keybase1.SimpleFSFileArchiveState_Complete 699 manifest[entryPathWithinJob] = entry 700 } 701 updateManifest(manifest) 702 } 703 704 return nil 705 } 706 707 func (m *archiveManager) copyingWorker(ctx context.Context) { 708 for { 709 select { 710 case <-ctx.Done(): 711 return 712 case <-m.copyingWorkerSignal: 713 } 714 715 jobID, jobCtx, ok := m.startWorkerTask(ctx, 716 keybase1.SimpleFSArchiveJobPhase_Indexed, 717 keybase1.SimpleFSArchiveJobPhase_Copying) 718 719 if !ok { 720 continue 721 } 722 // We got a task. Put another token into the signal channel so we 723 // check again on the next iteration. 724 m.signal(m.copyingWorkerSignal) 725 726 m.simpleFS.log.CDebugf(ctx, "copying: %s", jobID) 727 728 err := m.doCopying(jobCtx, jobID) 729 if err == nil { 730 m.simpleFS.log.CDebugf(jobCtx, "copying done on job %s", jobID) 731 m.changeJobPhase(jobCtx, jobID, keybase1.SimpleFSArchiveJobPhase_Copied) 732 m.signal(m.zippingWorkerSignal) // Done copying! Notify the zipping worker. 733 } else { 734 m.simpleFS.log.CErrorf(jobCtx, "copying error on job %s: %v", jobID, err) 735 m.setJobError(ctx, jobID, err) 736 } 737 738 err = m.flushStateFile(ctx) 739 if err != nil { 740 m.simpleFS.log.CWarningf(ctx, "m.flushStateFileLocked error: %v", err) 741 } 742 } 743 } 744 745 // zipWriterAddDir is adapted from zip.Writer.AddFS in go1.22.0 source because 1) we're 746 // not on a version with this function yet, and 2) Go's AddFS doesn't support 747 // symlinks; 3) we need bytesZippedUpdater here and we need to use CopyN for it. 748 func zipWriterAddDir(ctx context.Context, 749 w *zip.Writer, dirPath string, bytesZippedUpdater bytesUpdaterFunc) error { 750 fsys := os.DirFS(dirPath) 751 return fs.WalkDir(fsys, ".", func(name string, d fs.DirEntry, err error) error { 752 if err != nil { 753 return err 754 } 755 if d.IsDir() { 756 return nil 757 } 758 info, err := d.Info() 759 if err != nil { 760 return err 761 } 762 if !(info.Mode() &^ fs.ModeSymlink).IsRegular() { 763 return errors.New("zip: cannot add non-regular file except symlink") 764 } 765 h, err := zip.FileInfoHeader(info) 766 if err != nil { 767 return err 768 } 769 h.Name = name 770 h.Method = zip.Deflate 771 fw, err := w.CreateHeader(h) 772 if err != nil { 773 return err 774 } 775 switch { 776 case info.Mode()&fs.ModeSymlink != 0: 777 target, err := os.Readlink(filepath.Join(dirPath, name)) 778 if err != nil { 779 return err 780 } 781 _, err = fw.Write([]byte(filepath.ToSlash(target))) 782 if err != nil { 783 return err 784 } 785 return nil 786 default: 787 f, err := fsys.Open(name) 788 if err != nil { 789 return err 790 } 791 defer f.Close() 792 ctxAwareCopy(ctx, fw, f, bytesZippedUpdater) 793 return nil 794 } 795 }) 796 } 797 798 func (m *archiveManager) doZipping(ctx context.Context, jobID string) (err error) { 799 m.simpleFS.log.CDebugf(ctx, "+ doZipping %s", jobID) 800 defer func() { m.simpleFS.log.CDebugf(ctx, "- doZipping %s err: %v", jobID, err) }() 801 802 jobDesc, manifestBytes, err := func() (keybase1.SimpleFSArchiveJobDesc, []byte, error) { 803 m.mu.Lock() 804 defer m.mu.Unlock() 805 manifestBytes, err := json.MarshalIndent(m.state.Jobs[jobID].Manifest, "", " ") 806 return m.state.Jobs[jobID].Desc, manifestBytes, err 807 }() 808 if err != nil { 809 return fmt.Errorf( 810 "getting jobDesc and manifestBytes for %s error: %v", jobID, err) 811 } 812 813 // Reset BytesZipped. 814 func() { 815 m.mu.Lock() 816 defer m.mu.Unlock() 817 // Can override directly since only one worker can work on a give job at a time. 818 job := m.state.Jobs[jobID] 819 job.BytesZipped = 0 820 m.state.Jobs[jobID] = job 821 m.signal(m.notifyUIStateChangeSignal) 822 }() 823 824 updateBytesZipped := func(delta int64) { 825 m.mu.Lock() 826 defer m.mu.Unlock() 827 // Can override directly since only one worker can work on a give job at a time. 828 job := m.state.Jobs[jobID] 829 job.BytesZipped += delta 830 m.state.Jobs[jobID] = job 831 m.signal(m.notifyUIStateChangeSignal) 832 } 833 834 workspaceDir := getWorkspaceDir(jobDesc) 835 836 err = func() (err error) { 837 mode := os.O_WRONLY | os.O_CREATE | os.O_EXCL 838 if jobDesc.OverwriteZip { 839 mode = os.O_WRONLY | os.O_CREATE | os.O_TRUNC 840 } 841 zipFile, err := os.OpenFile(jobDesc.ZipFilePath, mode, 0666) 842 if err != nil { 843 return fmt.Errorf("os.Create(%s) error: %v", jobDesc.ZipFilePath, err) 844 } 845 defer func() { 846 closeErr := zipFile.Close() 847 if err == nil { 848 err = closeErr 849 } 850 }() 851 852 zipWriter := zip.NewWriter(zipFile) 853 defer func() { 854 closeErr := zipWriter.Close() 855 if err == nil { 856 err = closeErr 857 } 858 }() 859 860 err = zipWriterAddDir(ctx, zipWriter, workspaceDir, updateBytesZipped) 861 if err != nil { 862 return fmt.Errorf("zipWriterAddDir into %s error: %v", jobDesc.ZipFilePath, err) 863 } 864 865 { // write the manifest 866 w, err := zipWriter.Create("manifest.json") 867 if err != nil { 868 return fmt.Errorf("zipWriter.Create into %s error: %v", jobDesc.ZipFilePath, err) 869 } 870 _, err = w.Write(manifestBytes) 871 if err != nil { 872 return fmt.Errorf("w.Write manifest into %s error: %v", jobDesc.ZipFilePath, err) 873 } 874 } 875 876 return nil 877 }() 878 if err != nil { 879 return err 880 } 881 882 // Remove the workspace so we release the storage space early on before 883 // user dismisses the job. 884 err = os.RemoveAll(workspaceDir) 885 if err != nil { 886 m.simpleFS.log.CWarningf(ctx, "removing workspace %s error %v", workspaceDir, err) 887 } 888 889 return nil 890 } 891 892 func (m *archiveManager) zippingWorker(ctx context.Context) { 893 for { 894 select { 895 case <-ctx.Done(): 896 return 897 case <-m.zippingWorkerSignal: 898 } 899 900 jobID, jobCtx, ok := m.startWorkerTask(ctx, 901 keybase1.SimpleFSArchiveJobPhase_Copied, 902 keybase1.SimpleFSArchiveJobPhase_Zipping) 903 904 if !ok { 905 continue 906 } 907 // We got a task. Put another token into the signal channel so we 908 // check again on the next iteration. 909 m.signal(m.zippingWorkerSignal) 910 911 m.simpleFS.log.CDebugf(ctx, "zipping: %s", jobID) 912 913 err := m.doZipping(jobCtx, jobID) 914 if err == nil { 915 m.simpleFS.log.CDebugf(jobCtx, "zipping done on job %s", jobID) 916 m.changeJobPhase(jobCtx, jobID, keybase1.SimpleFSArchiveJobPhase_Done) 917 } else { 918 m.simpleFS.log.CErrorf(jobCtx, "zipping error on job %s: %v", jobID, err) 919 m.setJobError(ctx, jobID, err) 920 } 921 922 err = m.flushStateFile(ctx) 923 if err != nil { 924 m.simpleFS.log.CWarningf(ctx, "m.flushStateFileLocked error: %v", err) 925 } 926 } 927 } 928 929 func (m *archiveManager) resetInterruptedPhaseLocked(ctx context.Context, jobID string) (changed bool) { 930 switch m.state.Jobs[jobID].Phase { 931 case keybase1.SimpleFSArchiveJobPhase_Indexing: 932 m.simpleFS.log.CDebugf(ctx, "resetting %s phase from %s to %s", jobID, 933 keybase1.SimpleFSArchiveJobPhase_Indexing, 934 keybase1.SimpleFSArchiveJobPhase_Queued) 935 m.changeJobPhaseLocked(ctx, jobID, 936 keybase1.SimpleFSArchiveJobPhase_Queued) 937 return true 938 case keybase1.SimpleFSArchiveJobPhase_Copying: 939 m.simpleFS.log.CDebugf(ctx, "resetting %s phase from %s to %s", jobID, 940 keybase1.SimpleFSArchiveJobPhase_Copying, 941 keybase1.SimpleFSArchiveJobPhase_Indexed) 942 m.changeJobPhaseLocked(ctx, jobID, 943 keybase1.SimpleFSArchiveJobPhase_Indexed) 944 return true 945 case keybase1.SimpleFSArchiveJobPhase_Zipping: 946 m.simpleFS.log.CDebugf(ctx, "resetting %s phase from %s to %s", jobID, 947 keybase1.SimpleFSArchiveJobPhase_Zipping, 948 keybase1.SimpleFSArchiveJobPhase_Copied) 949 m.changeJobPhaseLocked(ctx, jobID, 950 keybase1.SimpleFSArchiveJobPhase_Copied) 951 return true 952 default: 953 m.simpleFS.log.CDebugf(ctx, "not resetting %s phase from %s", jobID, 954 m.state.Jobs[jobID].Phase) 955 return false 956 } 957 } 958 959 func (m *archiveManager) errorRetryWorker(ctx context.Context) { 960 ticker := time.NewTicker(time.Second * 5) 961 for { 962 select { 963 case <-ctx.Done(): 964 return 965 case <-ticker.C: 966 } 967 968 func() { 969 m.mu.Lock() 970 defer m.mu.Unlock() 971 jobIDs := make([]string, len(m.state.Jobs)) 972 for jobID := range m.state.Jobs { 973 jobIDs = append(jobIDs, jobID) 974 } 975 loopJobIDs: 976 for _, jobID := range jobIDs { 977 errState, ok := m.errors[jobID] 978 if !ok { 979 continue loopJobIDs 980 } 981 if time.Now().Before(errState.nextRetry) { 982 continue loopJobIDs 983 } 984 m.simpleFS.log.CDebugf(ctx, "retrying job %s", jobID) 985 changed := m.resetInterruptedPhaseLocked(ctx, jobID) 986 if !changed { 987 m.simpleFS.log.CWarningf(ctx, 988 "job %s has an error state %v but an unexpected job phase", 989 jobID, errState.err) 990 continue loopJobIDs 991 } 992 delete(m.errors, jobID) 993 994 m.signal(m.indexingWorkerSignal) 995 m.signal(m.copyingWorkerSignal) 996 m.signal(m.zippingWorkerSignal) 997 } 998 }() 999 } 1000 } 1001 1002 func (m *archiveManager) notifyUIStateChangeWorker(ctx context.Context) { 1003 limiter := rate.NewLimiter(rate.Every(time.Second/2), 1) 1004 for { 1005 select { 1006 case <-ctx.Done(): 1007 return 1008 case <-m.notifyUIStateChangeSignal: 1009 } 1010 limiter.Wait(ctx) 1011 1012 m.notifyUIStateChange(ctx) 1013 } 1014 } 1015 1016 func (m *archiveManager) start() { 1017 ctx := context.Background() 1018 ctx, m.ctxCancel = context.WithCancel(ctx) 1019 go m.indexingWorker(m.simpleFS.makeContext(ctx)) 1020 go m.copyingWorker(m.simpleFS.makeContext(ctx)) 1021 go m.zippingWorker(m.simpleFS.makeContext(ctx)) 1022 go m.errorRetryWorker(m.simpleFS.makeContext(ctx)) 1023 go m.notifyUIStateChangeWorker(m.simpleFS.makeContext(ctx)) 1024 m.signal(m.indexingWorkerSignal) 1025 m.signal(m.copyingWorkerSignal) 1026 m.signal(m.zippingWorkerSignal) 1027 } 1028 1029 func (m *archiveManager) resetInterruptedPhasesLocked(ctx context.Context) { 1030 // We don't resume indexing and zipping work, so just reset them here. 1031 // Copying is resumable but we have per file state tracking so reset the 1032 // phase here as well. 1033 for jobID := range m.state.Jobs { 1034 _ = m.resetInterruptedPhaseLocked(ctx, jobID) 1035 } 1036 } 1037 1038 func newArchiveManager(simpleFS *SimpleFS) (m *archiveManager, err error) { 1039 ctx := context.Background() 1040 simpleFS.log.CDebugf(ctx, "+ newArchiveManager") 1041 defer simpleFS.log.CDebugf(ctx, "- newArchiveManager") 1042 m = &archiveManager{ 1043 simpleFS: simpleFS, 1044 jobCtxCancellers: make(map[string]func()), 1045 errors: make(map[string]errorState), 1046 indexingWorkerSignal: make(chan struct{}, 1), 1047 copyingWorkerSignal: make(chan struct{}, 1), 1048 zippingWorkerSignal: make(chan struct{}, 1), 1049 notifyUIStateChangeSignal: make(chan struct{}, 1), 1050 } 1051 stateFilePath := getStateFilePath(simpleFS) 1052 m.state, err = loadArchiveStateFromJsonGz(ctx, simpleFS, stateFilePath) 1053 switch err { 1054 case nil: 1055 if m.state.Jobs == nil { 1056 m.state.Jobs = make(map[string]keybase1.SimpleFSArchiveJobState) 1057 } 1058 m.resetInterruptedPhasesLocked(ctx) 1059 default: 1060 simpleFS.log.CErrorf(ctx, "loadArchiveStateFromJsonGz error ( %v ). Creating a new state.", err) 1061 m.state = &keybase1.SimpleFSArchiveState{ 1062 Jobs: make(map[string]keybase1.SimpleFSArchiveJobState), 1063 } 1064 err = writeArchiveStateIntoJsonGz(ctx, simpleFS, stateFilePath, m.state) 1065 if err != nil { 1066 simpleFS.log.CErrorf(ctx, "newArchiveManager: creating state file error: %v", err) 1067 return nil, err 1068 } 1069 } 1070 m.start() 1071 return m, nil 1072 }