github.com/keybase/client/go@v0.0.0-20240424154521-52f30ea26cb1/kbfs/simplefs/archive.go (about)

     1  // Copyright 2024 Keybase, Inc. All rights reserved. Use of
     2  // this source code is governed by the included BSD license.
     3  
     4  package simplefs
     5  
     6  import (
     7  	"archive/zip"
     8  	"bytes"
     9  	"compress/gzip"
    10  	"crypto/sha256"
    11  	"encoding/hex"
    12  	"encoding/json"
    13  	"fmt"
    14  	"hash"
    15  	"io"
    16  	"io/fs"
    17  	"os"
    18  	"path/filepath"
    19  	"sort"
    20  	"sync"
    21  	"time"
    22  
    23  	"golang.org/x/time/rate"
    24  
    25  	"github.com/keybase/client/go/protocol/keybase1"
    26  	"github.com/pkg/errors"
    27  	"golang.org/x/net/context"
    28  	"gopkg.in/src-d/go-billy.v4"
    29  )
    30  
    31  func loadArchiveStateFromJsonGz(ctx context.Context, simpleFS *SimpleFS, filePath string) (state *keybase1.SimpleFSArchiveState, err error) {
    32  	f, err := os.Open(filePath)
    33  	if err != nil {
    34  		simpleFS.log.CErrorf(ctx, "loadArchiveStateFromJsonGz: opening state file error: %v", err)
    35  		return nil, err
    36  	}
    37  	defer f.Close()
    38  	gzReader, err := gzip.NewReader(f)
    39  	if err != nil {
    40  		simpleFS.log.CErrorf(ctx, "loadArchiveStateFromJsonGz: creating gzip reader error: %v", err)
    41  		return nil, err
    42  	}
    43  	decoder := json.NewDecoder(gzReader)
    44  	err = decoder.Decode(&state)
    45  	if err != nil {
    46  		simpleFS.log.CErrorf(ctx, "loadArchiveStateFromJsonGz: decoding state file error: %v", err)
    47  		return nil, err
    48  	}
    49  	return state, nil
    50  }
    51  
    52  func writeArchiveStateIntoJsonGz(ctx context.Context, simpleFS *SimpleFS, filePath string, s *keybase1.SimpleFSArchiveState) error {
    53  	err := os.MkdirAll(filepath.Dir(filePath), 0755)
    54  	if err != nil {
    55  		simpleFS.log.CErrorf(ctx, "writeArchiveStateIntoJsonGz: os.MkdirAll error: %v", err)
    56  		return err
    57  	}
    58  	f, err := os.Create(filePath)
    59  	if err != nil {
    60  		simpleFS.log.CErrorf(ctx, "writeArchiveStateIntoJsonGz: creating state file error: %v", err)
    61  		return err
    62  	}
    63  	defer f.Close()
    64  
    65  	gzWriter := gzip.NewWriter(f)
    66  	defer gzWriter.Close()
    67  
    68  	encoder := json.NewEncoder(gzWriter)
    69  	err = encoder.Encode(s)
    70  	if err != nil {
    71  		simpleFS.log.CErrorf(ctx, "writeArchiveStateIntoJsonGz: encoding state file error: %v", err)
    72  		return err
    73  	}
    74  
    75  	return nil
    76  }
    77  
    78  type errorState struct {
    79  	err       error
    80  	nextRetry time.Time
    81  }
    82  
    83  type archiveManager struct {
    84  	simpleFS *SimpleFS
    85  
    86  	// Just use a regular mutex rather than a rw one so all writes to
    87  	// persistent storage are synchronized.
    88  	mu               sync.Mutex
    89  	state            *keybase1.SimpleFSArchiveState
    90  	jobCtxCancellers map[string]func()
    91  	// jobID -> errorState. Populated when an error has happened. It's only
    92  	// valid for these phases:
    93  	//
    94  	//   keybase1.SimpleFSArchiveJobPhase_Indexing
    95  	//   keybase1.SimpleFSArchiveJobPhase_Copying
    96  	//   keybase1.SimpleFSArchiveJobPhase_Zipping
    97  	//
    98  	// When nextRetry is current errorRetryWorker delete the errorState from
    99  	// this map, while also putting them back to the previous phase so the
   100  	// worker can pick it up.
   101  	errors map[string]errorState
   102  
   103  	indexingWorkerSignal      chan struct{}
   104  	copyingWorkerSignal       chan struct{}
   105  	zippingWorkerSignal       chan struct{}
   106  	notifyUIStateChangeSignal chan struct{}
   107  
   108  	ctxCancel func()
   109  }
   110  
   111  func getStateFilePath(simpleFS *SimpleFS) string {
   112  	username := simpleFS.config.KbEnv().GetUsername()
   113  	cacheDir := simpleFS.getCacheDir()
   114  	return filepath.Join(cacheDir, fmt.Sprintf("kbfs-archive-%s.json.gz", username))
   115  }
   116  
   117  func (m *archiveManager) flushStateFileLocked(ctx context.Context) error {
   118  	select {
   119  	case <-ctx.Done():
   120  		return ctx.Err()
   121  	default:
   122  	}
   123  	err := writeArchiveStateIntoJsonGz(ctx, m.simpleFS, getStateFilePath(m.simpleFS), m.state)
   124  	if err != nil {
   125  		m.simpleFS.log.CErrorf(ctx,
   126  			"archiveManager.flushStateFileLocked: writing state file error: %v", err)
   127  		return err
   128  	}
   129  	return nil
   130  }
   131  
   132  func (m *archiveManager) flushStateFile(ctx context.Context) error {
   133  	m.mu.Lock()
   134  	defer m.mu.Unlock()
   135  	return m.flushStateFileLocked(ctx)
   136  }
   137  
   138  func (m *archiveManager) signal(ch chan struct{}) {
   139  	select {
   140  	case ch <- struct{}{}:
   141  	default:
   142  		// There's already a signal in the chan. Skipping this.
   143  	}
   144  }
   145  
   146  func (m *archiveManager) shutdown(ctx context.Context) {
   147  	// OK to cancel before flushStateFileLocked because we'll pass in the
   148  	// shutdown ctx there.
   149  	if m.ctxCancel != nil {
   150  		m.ctxCancel()
   151  	}
   152  
   153  	m.mu.Lock()
   154  	defer m.mu.Unlock()
   155  	err := m.flushStateFileLocked(ctx)
   156  	if err != nil {
   157  		m.simpleFS.log.CWarningf(ctx, "m.flushStateFileLocked error: %v", err)
   158  	}
   159  }
   160  
   161  func (m *archiveManager) notifyUIStateChange(ctx context.Context) {
   162  	m.simpleFS.log.CDebugf(ctx, "+ archiveManager.notifyUIStateChange")
   163  	defer m.simpleFS.log.CDebugf(ctx, "- archiveManager.notifyUIStateChange")
   164  	m.mu.Lock()
   165  	defer m.mu.Unlock()
   166  	state, errorStates := m.getCurrentStateLocked(ctx)
   167  	m.simpleFS.notifyUIStateChange(ctx, state, errorStates)
   168  }
   169  
   170  func (m *archiveManager) startJob(ctx context.Context, job keybase1.SimpleFSArchiveJobDesc) error {
   171  	m.simpleFS.log.CDebugf(ctx, "+ archiveManager.startJob %#+v", job)
   172  	defer m.simpleFS.log.CDebugf(ctx, "- archiveManager.startJob")
   173  
   174  	m.mu.Lock()
   175  	defer m.mu.Unlock()
   176  	if _, ok := m.state.Jobs[job.JobID]; ok {
   177  		return errors.New("job ID already exists")
   178  	}
   179  	m.state.Jobs[job.JobID] = keybase1.SimpleFSArchiveJobState{
   180  		Desc:  job,
   181  		Phase: keybase1.SimpleFSArchiveJobPhase_Queued,
   182  	}
   183  	m.state.LastUpdated = keybase1.ToTime(time.Now())
   184  	m.signal(m.notifyUIStateChangeSignal)
   185  	m.signal(m.indexingWorkerSignal)
   186  	return m.flushStateFileLocked(ctx)
   187  }
   188  
   189  func (m *archiveManager) cancelOrDismissJob(ctx context.Context,
   190  	jobID string) (err error) {
   191  	m.simpleFS.log.CDebugf(ctx, "+ archiveManager.cancelOrDismissJob")
   192  	defer m.simpleFS.log.CDebugf(ctx, "- archiveManager.cancelOrDismissJob %s", jobID)
   193  	m.mu.Lock()
   194  	defer m.mu.Unlock()
   195  
   196  	if cancel, ok := m.jobCtxCancellers[jobID]; ok {
   197  		cancel()
   198  		delete(m.jobCtxCancellers, jobID)
   199  	}
   200  
   201  	job, ok := m.state.Jobs[jobID]
   202  	if !ok {
   203  		return errors.New("job not found")
   204  	}
   205  	delete(m.state.Jobs, jobID)
   206  
   207  	err = os.RemoveAll(job.Desc.StagingPath)
   208  	if err != nil {
   209  		m.simpleFS.log.CWarningf(ctx, "removing staging path %q for job %s error: %v",
   210  			job.Desc.StagingPath, jobID, err)
   211  	}
   212  
   213  	m.signal(m.notifyUIStateChangeSignal)
   214  	return nil
   215  }
   216  
   217  func (m *archiveManager) getCurrentStateLocked(ctx context.Context) (
   218  	state keybase1.SimpleFSArchiveState, errorStates map[string]errorState) {
   219  	errorStates = make(map[string]errorState)
   220  	for jobID, errState := range m.errors {
   221  		errorStates[jobID] = errState
   222  	}
   223  	return m.state.DeepCopy(), errorStates
   224  }
   225  
   226  func (m *archiveManager) getCurrentState(ctx context.Context) (
   227  	state keybase1.SimpleFSArchiveState, errorStates map[string]errorState) {
   228  	m.simpleFS.log.CDebugf(ctx, "+ archiveManager.getCurrentState")
   229  	defer m.simpleFS.log.CDebugf(ctx, "- archiveManager.getCurrentState")
   230  	m.mu.Lock()
   231  	defer m.mu.Unlock()
   232  	return m.getCurrentStateLocked(ctx)
   233  }
   234  
   235  func (m *archiveManager) changeJobPhaseLocked(ctx context.Context,
   236  	jobID string, newPhase keybase1.SimpleFSArchiveJobPhase) {
   237  	copy, ok := m.state.Jobs[jobID]
   238  	if !ok {
   239  		m.simpleFS.log.CWarningf(ctx, "job %s not found. it might have been canceled", jobID)
   240  		return
   241  	}
   242  	copy.Phase = newPhase
   243  	m.state.Jobs[jobID] = copy
   244  	m.signal(m.notifyUIStateChangeSignal)
   245  }
   246  func (m *archiveManager) changeJobPhase(ctx context.Context,
   247  	jobID string, newPhase keybase1.SimpleFSArchiveJobPhase) {
   248  	m.mu.Lock()
   249  	defer m.mu.Unlock()
   250  	m.changeJobPhaseLocked(ctx, jobID, newPhase)
   251  }
   252  
   253  func (m *archiveManager) startWorkerTask(ctx context.Context,
   254  	eligiblePhase keybase1.SimpleFSArchiveJobPhase,
   255  	newPhase keybase1.SimpleFSArchiveJobPhase) (jobID string, jobCtx context.Context, ok bool) {
   256  	jobCtx, cancel := context.WithCancel(ctx)
   257  	m.mu.Lock()
   258  	defer m.mu.Unlock()
   259  	for jobID := range m.state.Jobs {
   260  		if m.state.Jobs[jobID].Phase == eligiblePhase {
   261  			m.changeJobPhaseLocked(ctx, jobID, newPhase)
   262  			m.jobCtxCancellers[jobID] = cancel
   263  			return jobID, jobCtx, true
   264  		}
   265  	}
   266  	return "", nil, false
   267  }
   268  
   269  const archiveErrorRetryDuration = time.Minute
   270  
   271  func (m *archiveManager) setJobError(
   272  	ctx context.Context, jobID string, err error) {
   273  	m.mu.Lock()
   274  	defer m.mu.Unlock()
   275  	nextRetry := time.Now().Add(archiveErrorRetryDuration)
   276  	m.simpleFS.log.CErrorf(ctx, "job %s nextRetry: %s", jobID, nextRetry)
   277  	m.errors[jobID] = errorState{
   278  		err:       err,
   279  		nextRetry: nextRetry,
   280  	}
   281  }
   282  
   283  func (m *archiveManager) doIndexing(ctx context.Context, jobID string) (err error) {
   284  	m.simpleFS.log.CDebugf(ctx, "+ doIndexing %s", jobID)
   285  	defer func() { m.simpleFS.log.CDebugf(ctx, "- doIndexing %s err: %v", jobID, err) }()
   286  
   287  	jobDesc := func() keybase1.SimpleFSArchiveJobDesc {
   288  		m.mu.Lock()
   289  		defer m.mu.Unlock()
   290  		return m.state.Jobs[jobID].Desc
   291  	}()
   292  	opid, err := m.simpleFS.SimpleFSMakeOpid(ctx)
   293  	if err != nil {
   294  		return err
   295  	}
   296  	defer m.simpleFS.SimpleFSClose(ctx, opid)
   297  	filter := keybase1.ListFilter_NO_FILTER
   298  	err = m.simpleFS.SimpleFSListRecursive(ctx, keybase1.SimpleFSListRecursiveArg{
   299  		OpID:   opid,
   300  		Path:   keybase1.NewPathWithKbfsArchived(jobDesc.KbfsPathWithRevision),
   301  		Filter: filter,
   302  	})
   303  	err = m.simpleFS.SimpleFSWait(ctx, opid)
   304  	if err != nil {
   305  		return err
   306  	}
   307  
   308  	listResult, err := m.simpleFS.SimpleFSReadList(ctx, opid)
   309  	if err != nil {
   310  		return err
   311  	}
   312  
   313  	var bytesTotal int64
   314  	manifest := make(map[string]keybase1.SimpleFSArchiveFile)
   315  	for _, e := range listResult.Entries {
   316  		manifest[e.Name] = keybase1.SimpleFSArchiveFile{
   317  			State:      keybase1.SimpleFSFileArchiveState_ToDo,
   318  			DirentType: e.DirentType,
   319  		}
   320  		if e.DirentType == keybase1.DirentType_FILE ||
   321  			e.DirentType == keybase1.DirentType_EXEC {
   322  			bytesTotal += int64(e.Size)
   323  		}
   324  	}
   325  
   326  	func() {
   327  		m.mu.Lock()
   328  		defer m.mu.Unlock()
   329  
   330  		jobCopy, ok := m.state.Jobs[jobID]
   331  		if !ok {
   332  			m.simpleFS.log.CWarningf(ctx, "job %s not found. it might have been canceled", jobID)
   333  			return
   334  		}
   335  		jobCopy.Manifest = manifest
   336  		jobCopy.BytesTotal = bytesTotal
   337  		m.state.Jobs[jobID] = jobCopy
   338  		m.signal(m.notifyUIStateChangeSignal)
   339  	}()
   340  	return nil
   341  }
   342  
   343  func (m *archiveManager) indexingWorker(ctx context.Context) {
   344  	for {
   345  		select {
   346  		case <-ctx.Done():
   347  			return
   348  		case <-m.indexingWorkerSignal:
   349  		}
   350  
   351  		jobID, jobCtx, ok := m.startWorkerTask(ctx,
   352  			keybase1.SimpleFSArchiveJobPhase_Queued,
   353  			keybase1.SimpleFSArchiveJobPhase_Indexing)
   354  
   355  		if !ok {
   356  			continue
   357  		}
   358  		// We got a task. Put another token into the signal channel so we
   359  		// check again on the next iteration.
   360  		m.signal(m.indexingWorkerSignal)
   361  
   362  		m.simpleFS.log.CDebugf(ctx, "indexing: %s", jobID)
   363  
   364  		err := m.doIndexing(jobCtx, jobID)
   365  		if err == nil {
   366  			m.simpleFS.log.CDebugf(jobCtx, "indexing done on job %s", jobID)
   367  			m.changeJobPhase(jobCtx, jobID, keybase1.SimpleFSArchiveJobPhase_Indexed)
   368  			m.signal(m.copyingWorkerSignal) // Done indexing! Notify the copying worker.
   369  		} else {
   370  			m.simpleFS.log.CErrorf(jobCtx, "indexing error on job %s: %v", jobID, err)
   371  			m.setJobError(ctx, jobID, err)
   372  		}
   373  
   374  		err = m.flushStateFile(ctx)
   375  		if err != nil {
   376  			m.simpleFS.log.CWarningf(ctx, "m.flushStateFileLocked error: %v", err)
   377  		}
   378  	}
   379  }
   380  
   381  type sha256TeeReader struct {
   382  	inner          io.Reader
   383  	innerTeeReader io.Reader
   384  	h              hash.Hash
   385  }
   386  
   387  var _ io.Reader = (*sha256TeeReader)(nil)
   388  
   389  // Read implements the io.Reader interface.
   390  func (r *sha256TeeReader) Read(p []byte) (n int, err error) {
   391  	return r.innerTeeReader.Read(p)
   392  }
   393  
   394  func (r *sha256TeeReader) getSum() []byte {
   395  	return r.h.Sum(nil)
   396  }
   397  
   398  func newSHA256TeeReader(inner io.Reader) (r *sha256TeeReader) {
   399  	r = &sha256TeeReader{
   400  		inner: inner,
   401  		h:     sha256.New(),
   402  	}
   403  	r.innerTeeReader = io.TeeReader(r.inner, r.h)
   404  	return r
   405  }
   406  
   407  type bytesUpdaterFunc = func(delta int64)
   408  
   409  func ctxAwareCopy(
   410  	ctx context.Context, to io.Writer, from io.Reader,
   411  	bytesUpdater bytesUpdaterFunc) error {
   412  	for {
   413  		select {
   414  		case <-ctx.Done():
   415  			return ctx.Err()
   416  		default:
   417  		}
   418  		n, err := io.CopyN(to, from, 64*1024)
   419  		switch err {
   420  		case nil:
   421  			bytesUpdater(n)
   422  		case io.EOF:
   423  			bytesUpdater(n)
   424  			return nil
   425  		default:
   426  			return err
   427  		}
   428  	}
   429  }
   430  
   431  func (m *archiveManager) copyFileFromBeginning(ctx context.Context,
   432  	srcDirFS billy.Filesystem, entryPathWithinJob string,
   433  	localPath string, mode os.FileMode,
   434  	bytesCopiedUpdater bytesUpdaterFunc) (sha256Sum []byte, err error) {
   435  	m.simpleFS.log.CDebugf(ctx, "+ copyFileFromBeginning %s", entryPathWithinJob)
   436  	defer func() { m.simpleFS.log.CDebugf(ctx, "- copyFileFromBeginning %s err: %v", entryPathWithinJob, err) }()
   437  
   438  	src, err := srcDirFS.Open(entryPathWithinJob)
   439  	if err != nil {
   440  		return nil, fmt.Errorf("srcDirFS.Open(%s) error: %v", entryPathWithinJob, err)
   441  	}
   442  	defer src.Close()
   443  
   444  	dst, err := os.OpenFile(localPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, mode)
   445  	if err != nil {
   446  		return nil, fmt.Errorf("os.OpenFile(%s) error: %v", localPath, err)
   447  	}
   448  	defer dst.Close()
   449  
   450  	teeReader := newSHA256TeeReader(src)
   451  
   452  	err = ctxAwareCopy(ctx, dst, teeReader, bytesCopiedUpdater)
   453  	if err != nil {
   454  		return nil, fmt.Errorf("[%s] io.CopyN error: %v", entryPathWithinJob, err)
   455  	}
   456  
   457  	// We didn't continue from a previously interrupted copy, so don't
   458  	// bother verifying the sha256sum and just return it.
   459  	return teeReader.getSum(), nil
   460  }
   461  
   462  func (m *archiveManager) copyFilePickupPrevious(ctx context.Context,
   463  	srcDirFS billy.Filesystem, entryPathWithinJob string,
   464  	localPath string, srcSeekOffset int64, mode os.FileMode,
   465  	bytesCopiedUpdater bytesUpdaterFunc) (sha256Sum []byte, err error) {
   466  	m.simpleFS.log.CDebugf(ctx, "+ copyFilePickupPrevious %s", entryPathWithinJob)
   467  	defer func() { m.simpleFS.log.CDebugf(ctx, "- copyFilePickupPrevious %s err: %v", entryPathWithinJob, err) }()
   468  
   469  	src, err := srcDirFS.Open(entryPathWithinJob)
   470  	if err != nil {
   471  		return nil, fmt.Errorf("srcDirFS.Open(%s) error: %v", entryPathWithinJob, err)
   472  	}
   473  	defer src.Close()
   474  
   475  	_, err = src.Seek(srcSeekOffset, io.SeekStart)
   476  	if err != nil {
   477  		return nil, fmt.Errorf("[%s] src.Seek error: %v", entryPathWithinJob, err)
   478  	}
   479  
   480  	// Copy the file.
   481  	if err = func() error {
   482  		dst, err := os.OpenFile(localPath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, mode)
   483  		if err != nil {
   484  			return fmt.Errorf("os.OpenFile(%s) error: %v", localPath, err)
   485  		}
   486  		defer dst.Close()
   487  
   488  		err = ctxAwareCopy(ctx, dst, src, bytesCopiedUpdater)
   489  		if err != nil {
   490  			return fmt.Errorf("[%s] io.CopyN error: %v", entryPathWithinJob, err)
   491  		}
   492  
   493  		return nil
   494  	}(); err != nil {
   495  		return nil, err
   496  	}
   497  
   498  	var size int64
   499  	// Calculate sha256 and check the sha256 of the copied file since we
   500  	// continued from a previously interrupted copy.
   501  	srcSHA256Sum, dstSHA256Sum, err := func() (srcSHA256Sum, dstSHA256Sum []byte, err error) {
   502  		_, err = src.Seek(0, io.SeekStart)
   503  		if err != nil {
   504  			return nil, nil, fmt.Errorf("[%s] src.Seek error: %v", entryPathWithinJob, err)
   505  		}
   506  		srcSHA256SumHasher := sha256.New()
   507  		size, err = io.Copy(srcSHA256SumHasher, src)
   508  		if err != nil {
   509  			return nil, nil, fmt.Errorf("[%s] io.Copy error: %v", entryPathWithinJob, err)
   510  		}
   511  		srcSHA256Sum = srcSHA256SumHasher.Sum(nil)
   512  
   513  		dst, err := os.Open(localPath)
   514  		if err != nil {
   515  			return nil, nil, fmt.Errorf("os.Open(%s) error: %v", localPath, err)
   516  		}
   517  		defer dst.Close()
   518  		dstSHA256SumHasher := sha256.New()
   519  		_, err = io.Copy(dstSHA256SumHasher, dst)
   520  		if err != nil {
   521  			return nil, nil, fmt.Errorf("[%s] io.Copy error: %v", entryPathWithinJob, err)
   522  		}
   523  		dstSHA256Sum = dstSHA256SumHasher.Sum(nil)
   524  
   525  		return srcSHA256Sum, dstSHA256Sum, nil
   526  	}()
   527  	if err != nil {
   528  		return nil, err
   529  	}
   530  
   531  	if !bytes.Equal(srcSHA256Sum, dstSHA256Sum) {
   532  		m.simpleFS.log.CInfof(ctx,
   533  			"file corruption is detected from a previous copy. Will copy from the beginning: ",
   534  			entryPathWithinJob)
   535  		bytesCopiedUpdater(-size)
   536  		return m.copyFileFromBeginning(ctx, srcDirFS, entryPathWithinJob, localPath, mode, bytesCopiedUpdater)
   537  	}
   538  
   539  	return srcSHA256Sum, nil
   540  }
   541  
   542  func (m *archiveManager) copyFile(ctx context.Context,
   543  	srcDirFS billy.Filesystem, entryPathWithinJob string,
   544  	localPath string, srcSeekOffset int64, mode os.FileMode,
   545  	bytesCopiedUpdater bytesUpdaterFunc) (sha256Sum []byte, err error) {
   546  	if srcSeekOffset == 0 {
   547  		return m.copyFileFromBeginning(ctx, srcDirFS, entryPathWithinJob, localPath, mode, bytesCopiedUpdater)
   548  	}
   549  	return m.copyFilePickupPrevious(ctx, srcDirFS, entryPathWithinJob, localPath, srcSeekOffset, mode, bytesCopiedUpdater)
   550  }
   551  
   552  func getWorkspaceDir(jobDesc keybase1.SimpleFSArchiveJobDesc) string {
   553  	return filepath.Join(jobDesc.StagingPath, "workspace")
   554  }
   555  
   556  func (m *archiveManager) doCopying(ctx context.Context, jobID string) (err error) {
   557  	m.simpleFS.log.CDebugf(ctx, "+ doCopying %s", jobID)
   558  	defer func() { m.simpleFS.log.CDebugf(ctx, "- doCopying %s err: %v", jobID, err) }()
   559  
   560  	desc, manifest := func() (keybase1.SimpleFSArchiveJobDesc, map[string]keybase1.SimpleFSArchiveFile) {
   561  		m.mu.Lock()
   562  		defer m.mu.Unlock()
   563  		manifest := make(map[string]keybase1.SimpleFSArchiveFile)
   564  		for k, v := range m.state.Jobs[jobID].Manifest {
   565  			manifest[k] = v.DeepCopy()
   566  		}
   567  		return m.state.Jobs[jobID].Desc, manifest
   568  	}()
   569  
   570  	updateManifest := func(manifest map[string]keybase1.SimpleFSArchiveFile) {
   571  		m.mu.Lock()
   572  		defer m.mu.Unlock()
   573  		// Can override directly since only one worker can work on a give job at a time.
   574  		job := m.state.Jobs[jobID]
   575  		for k, v := range manifest {
   576  			job.Manifest[k] = v.DeepCopy()
   577  		}
   578  		m.state.Jobs[jobID] = job
   579  		m.signal(m.notifyUIStateChangeSignal)
   580  	}
   581  
   582  	updateBytesCopied := func(delta int64) {
   583  		m.mu.Lock()
   584  		defer m.mu.Unlock()
   585  		// Can override directly since only one worker can work on a give job at a time.
   586  		job := m.state.Jobs[jobID]
   587  		job.BytesCopied += delta
   588  		m.state.Jobs[jobID] = job
   589  		m.signal(m.notifyUIStateChangeSignal)
   590  	}
   591  
   592  	srcContainingDirFS, finalElem, err := m.simpleFS.getFSIfExists(ctx,
   593  		keybase1.NewPathWithKbfsArchived(desc.KbfsPathWithRevision))
   594  	if err != nil {
   595  		return fmt.Errorf("getFSIfExists error: %v", err)
   596  	}
   597  	srcDirFS, err := srcContainingDirFS.Chroot(finalElem)
   598  	if err != nil {
   599  		return fmt.Errorf("srcContainingDirFS.Chroot error: %v", err)
   600  	}
   601  	dstBase := filepath.Join(getWorkspaceDir(desc), desc.TargetName)
   602  
   603  	entryPaths := make([]string, 0, len(manifest))
   604  	for entryPathWithinJob := range manifest {
   605  		entryPaths = append(entryPaths, entryPathWithinJob)
   606  	}
   607  	sort.Strings(entryPaths)
   608  
   609  loopEntryPaths:
   610  	for _, entryPathWithinJob := range entryPaths {
   611  		entry := manifest[entryPathWithinJob]
   612  		entry.State = keybase1.SimpleFSFileArchiveState_InProgress
   613  		manifest[entryPathWithinJob] = entry
   614  		updateManifest(manifest)
   615  
   616  		localPath := filepath.Join(dstBase, entryPathWithinJob)
   617  		srcFI, err := srcDirFS.Lstat(entryPathWithinJob)
   618  		if err != nil {
   619  			return fmt.Errorf("srcDirFS.LStat(%s) error: %v", entryPathWithinJob, err)
   620  		}
   621  		switch {
   622  		case srcFI.IsDir():
   623  			err = os.MkdirAll(localPath, 0755)
   624  			if err != nil {
   625  				return fmt.Errorf("os.MkdirAll(%s) error: %v", localPath, err)
   626  			}
   627  			err = os.Chtimes(localPath, time.Time{}, srcFI.ModTime())
   628  			if err != nil {
   629  				return fmt.Errorf("os.Chtimes(%s) error: %v", localPath, err)
   630  			}
   631  			entry.State = keybase1.SimpleFSFileArchiveState_Complete
   632  			manifest[entryPathWithinJob] = entry
   633  		case srcFI.Mode()&os.ModeSymlink != 0: // symlink
   634  			err = os.MkdirAll(filepath.Dir(localPath), 0755)
   635  			if err != nil {
   636  				return fmt.Errorf("os.MkdirAll(filepath.Dir(%s)) error: %v", localPath, err)
   637  			}
   638  			// Call Stat, which follows symlinks, to make sure the link doesn't
   639  			// escape outside the srcDirFS.
   640  			_, err = srcDirFS.Stat(entryPathWithinJob)
   641  			if err != nil {
   642  				m.simpleFS.log.CWarningf(ctx, "skipping %s due to srcDirFS.Stat error: %v", entryPathWithinJob, err)
   643  				entry.State = keybase1.SimpleFSFileArchiveState_Skipped
   644  				manifest[entryPathWithinJob] = entry
   645  				continue loopEntryPaths
   646  			}
   647  
   648  			link, err := srcDirFS.Readlink(entryPathWithinJob)
   649  			if err != nil {
   650  				return fmt.Errorf("srcDirFS(%s) error: %v", entryPathWithinJob, err)
   651  			}
   652  			m.simpleFS.log.CInfof(ctx, "calling os.Symlink(%s, %s) ", link, localPath)
   653  			err = os.Symlink(link, localPath)
   654  			if err != nil {
   655  				return fmt.Errorf("os.Symlink(%s, %s) error: %v", link, localPath, err)
   656  			}
   657  			// Skipping Chtimes becasue there doesn't seem to be a way to
   658  			// change time on symlinks.
   659  			entry.State = keybase1.SimpleFSFileArchiveState_Complete
   660  			manifest[entryPathWithinJob] = entry
   661  		default:
   662  			err = os.MkdirAll(filepath.Dir(localPath), 0755)
   663  			if err != nil {
   664  				return fmt.Errorf("os.MkdirAll(filepath.Dir(%s)) error: %v", localPath, err)
   665  			}
   666  
   667  			var mode os.FileMode = 0644
   668  			if srcFI.Mode()&0100 != 0 {
   669  				mode = 0755
   670  			}
   671  
   672  			seek := int64(0)
   673  
   674  			dstFI, err := os.Lstat(localPath)
   675  			switch {
   676  			case os.IsNotExist(err): // simple copy from the start of file
   677  			case err == nil: // continue from a previously interrupted copy
   678  				if srcFI.Mode()&os.ModeSymlink == 0 {
   679  					seek = dstFI.Size()
   680  				}
   681  				// otherwise copy from the start of file
   682  			default:
   683  				return fmt.Errorf("os.Lstat(%s) error: %v", localPath, err)
   684  			}
   685  
   686  			sha256Sum, err := m.copyFile(ctx,
   687  				srcDirFS, entryPathWithinJob, localPath, seek, mode, updateBytesCopied)
   688  			if err != nil {
   689  				return err
   690  			}
   691  
   692  			err = os.Chtimes(localPath, time.Time{}, srcFI.ModTime())
   693  			if err != nil {
   694  				return fmt.Errorf("os.Chtimes(%s) error: %v", localPath, err)
   695  			}
   696  
   697  			entry.Sha256SumHex = hex.EncodeToString(sha256Sum)
   698  			entry.State = keybase1.SimpleFSFileArchiveState_Complete
   699  			manifest[entryPathWithinJob] = entry
   700  		}
   701  		updateManifest(manifest)
   702  	}
   703  
   704  	return nil
   705  }
   706  
   707  func (m *archiveManager) copyingWorker(ctx context.Context) {
   708  	for {
   709  		select {
   710  		case <-ctx.Done():
   711  			return
   712  		case <-m.copyingWorkerSignal:
   713  		}
   714  
   715  		jobID, jobCtx, ok := m.startWorkerTask(ctx,
   716  			keybase1.SimpleFSArchiveJobPhase_Indexed,
   717  			keybase1.SimpleFSArchiveJobPhase_Copying)
   718  
   719  		if !ok {
   720  			continue
   721  		}
   722  		// We got a task. Put another token into the signal channel so we
   723  		// check again on the next iteration.
   724  		m.signal(m.copyingWorkerSignal)
   725  
   726  		m.simpleFS.log.CDebugf(ctx, "copying: %s", jobID)
   727  
   728  		err := m.doCopying(jobCtx, jobID)
   729  		if err == nil {
   730  			m.simpleFS.log.CDebugf(jobCtx, "copying done on job %s", jobID)
   731  			m.changeJobPhase(jobCtx, jobID, keybase1.SimpleFSArchiveJobPhase_Copied)
   732  			m.signal(m.zippingWorkerSignal) // Done copying! Notify the zipping worker.
   733  		} else {
   734  			m.simpleFS.log.CErrorf(jobCtx, "copying error on job %s: %v", jobID, err)
   735  			m.setJobError(ctx, jobID, err)
   736  		}
   737  
   738  		err = m.flushStateFile(ctx)
   739  		if err != nil {
   740  			m.simpleFS.log.CWarningf(ctx, "m.flushStateFileLocked error: %v", err)
   741  		}
   742  	}
   743  }
   744  
   745  // zipWriterAddDir is adapted from zip.Writer.AddFS in go1.22.0 source because 1) we're
   746  // not on a version with this function yet, and 2) Go's AddFS doesn't support
   747  // symlinks; 3) we need bytesZippedUpdater here and we need to use CopyN for it.
   748  func zipWriterAddDir(ctx context.Context,
   749  	w *zip.Writer, dirPath string, bytesZippedUpdater bytesUpdaterFunc) error {
   750  	fsys := os.DirFS(dirPath)
   751  	return fs.WalkDir(fsys, ".", func(name string, d fs.DirEntry, err error) error {
   752  		if err != nil {
   753  			return err
   754  		}
   755  		if d.IsDir() {
   756  			return nil
   757  		}
   758  		info, err := d.Info()
   759  		if err != nil {
   760  			return err
   761  		}
   762  		if !(info.Mode() &^ fs.ModeSymlink).IsRegular() {
   763  			return errors.New("zip: cannot add non-regular file except symlink")
   764  		}
   765  		h, err := zip.FileInfoHeader(info)
   766  		if err != nil {
   767  			return err
   768  		}
   769  		h.Name = name
   770  		h.Method = zip.Deflate
   771  		fw, err := w.CreateHeader(h)
   772  		if err != nil {
   773  			return err
   774  		}
   775  		switch {
   776  		case info.Mode()&fs.ModeSymlink != 0:
   777  			target, err := os.Readlink(filepath.Join(dirPath, name))
   778  			if err != nil {
   779  				return err
   780  			}
   781  			_, err = fw.Write([]byte(filepath.ToSlash(target)))
   782  			if err != nil {
   783  				return err
   784  			}
   785  			return nil
   786  		default:
   787  			f, err := fsys.Open(name)
   788  			if err != nil {
   789  				return err
   790  			}
   791  			defer f.Close()
   792  			ctxAwareCopy(ctx, fw, f, bytesZippedUpdater)
   793  			return nil
   794  		}
   795  	})
   796  }
   797  
   798  func (m *archiveManager) doZipping(ctx context.Context, jobID string) (err error) {
   799  	m.simpleFS.log.CDebugf(ctx, "+ doZipping %s", jobID)
   800  	defer func() { m.simpleFS.log.CDebugf(ctx, "- doZipping %s err: %v", jobID, err) }()
   801  
   802  	jobDesc, manifestBytes, err := func() (keybase1.SimpleFSArchiveJobDesc, []byte, error) {
   803  		m.mu.Lock()
   804  		defer m.mu.Unlock()
   805  		manifestBytes, err := json.MarshalIndent(m.state.Jobs[jobID].Manifest, "", "  ")
   806  		return m.state.Jobs[jobID].Desc, manifestBytes, err
   807  	}()
   808  	if err != nil {
   809  		return fmt.Errorf(
   810  			"getting jobDesc and manifestBytes for %s error: %v", jobID, err)
   811  	}
   812  
   813  	// Reset BytesZipped.
   814  	func() {
   815  		m.mu.Lock()
   816  		defer m.mu.Unlock()
   817  		// Can override directly since only one worker can work on a give job at a time.
   818  		job := m.state.Jobs[jobID]
   819  		job.BytesZipped = 0
   820  		m.state.Jobs[jobID] = job
   821  		m.signal(m.notifyUIStateChangeSignal)
   822  	}()
   823  
   824  	updateBytesZipped := func(delta int64) {
   825  		m.mu.Lock()
   826  		defer m.mu.Unlock()
   827  		// Can override directly since only one worker can work on a give job at a time.
   828  		job := m.state.Jobs[jobID]
   829  		job.BytesZipped += delta
   830  		m.state.Jobs[jobID] = job
   831  		m.signal(m.notifyUIStateChangeSignal)
   832  	}
   833  
   834  	workspaceDir := getWorkspaceDir(jobDesc)
   835  
   836  	err = func() (err error) {
   837  		mode := os.O_WRONLY | os.O_CREATE | os.O_EXCL
   838  		if jobDesc.OverwriteZip {
   839  			mode = os.O_WRONLY | os.O_CREATE | os.O_TRUNC
   840  		}
   841  		zipFile, err := os.OpenFile(jobDesc.ZipFilePath, mode, 0666)
   842  		if err != nil {
   843  			return fmt.Errorf("os.Create(%s) error: %v", jobDesc.ZipFilePath, err)
   844  		}
   845  		defer func() {
   846  			closeErr := zipFile.Close()
   847  			if err == nil {
   848  				err = closeErr
   849  			}
   850  		}()
   851  
   852  		zipWriter := zip.NewWriter(zipFile)
   853  		defer func() {
   854  			closeErr := zipWriter.Close()
   855  			if err == nil {
   856  				err = closeErr
   857  			}
   858  		}()
   859  
   860  		err = zipWriterAddDir(ctx, zipWriter, workspaceDir, updateBytesZipped)
   861  		if err != nil {
   862  			return fmt.Errorf("zipWriterAddDir into %s error: %v", jobDesc.ZipFilePath, err)
   863  		}
   864  
   865  		{ // write the manifest
   866  			w, err := zipWriter.Create("manifest.json")
   867  			if err != nil {
   868  				return fmt.Errorf("zipWriter.Create into %s error: %v", jobDesc.ZipFilePath, err)
   869  			}
   870  			_, err = w.Write(manifestBytes)
   871  			if err != nil {
   872  				return fmt.Errorf("w.Write manifest into %s error: %v", jobDesc.ZipFilePath, err)
   873  			}
   874  		}
   875  
   876  		return nil
   877  	}()
   878  	if err != nil {
   879  		return err
   880  	}
   881  
   882  	// Remove the workspace so we release the storage space early on before
   883  	// user dismisses the job.
   884  	err = os.RemoveAll(workspaceDir)
   885  	if err != nil {
   886  		m.simpleFS.log.CWarningf(ctx, "removing workspace %s error %v", workspaceDir, err)
   887  	}
   888  
   889  	return nil
   890  }
   891  
   892  func (m *archiveManager) zippingWorker(ctx context.Context) {
   893  	for {
   894  		select {
   895  		case <-ctx.Done():
   896  			return
   897  		case <-m.zippingWorkerSignal:
   898  		}
   899  
   900  		jobID, jobCtx, ok := m.startWorkerTask(ctx,
   901  			keybase1.SimpleFSArchiveJobPhase_Copied,
   902  			keybase1.SimpleFSArchiveJobPhase_Zipping)
   903  
   904  		if !ok {
   905  			continue
   906  		}
   907  		// We got a task. Put another token into the signal channel so we
   908  		// check again on the next iteration.
   909  		m.signal(m.zippingWorkerSignal)
   910  
   911  		m.simpleFS.log.CDebugf(ctx, "zipping: %s", jobID)
   912  
   913  		err := m.doZipping(jobCtx, jobID)
   914  		if err == nil {
   915  			m.simpleFS.log.CDebugf(jobCtx, "zipping done on job %s", jobID)
   916  			m.changeJobPhase(jobCtx, jobID, keybase1.SimpleFSArchiveJobPhase_Done)
   917  		} else {
   918  			m.simpleFS.log.CErrorf(jobCtx, "zipping error on job %s: %v", jobID, err)
   919  			m.setJobError(ctx, jobID, err)
   920  		}
   921  
   922  		err = m.flushStateFile(ctx)
   923  		if err != nil {
   924  			m.simpleFS.log.CWarningf(ctx, "m.flushStateFileLocked error: %v", err)
   925  		}
   926  	}
   927  }
   928  
   929  func (m *archiveManager) resetInterruptedPhaseLocked(ctx context.Context, jobID string) (changed bool) {
   930  	switch m.state.Jobs[jobID].Phase {
   931  	case keybase1.SimpleFSArchiveJobPhase_Indexing:
   932  		m.simpleFS.log.CDebugf(ctx, "resetting %s phase from %s to %s", jobID,
   933  			keybase1.SimpleFSArchiveJobPhase_Indexing,
   934  			keybase1.SimpleFSArchiveJobPhase_Queued)
   935  		m.changeJobPhaseLocked(ctx, jobID,
   936  			keybase1.SimpleFSArchiveJobPhase_Queued)
   937  		return true
   938  	case keybase1.SimpleFSArchiveJobPhase_Copying:
   939  		m.simpleFS.log.CDebugf(ctx, "resetting %s phase from %s to %s", jobID,
   940  			keybase1.SimpleFSArchiveJobPhase_Copying,
   941  			keybase1.SimpleFSArchiveJobPhase_Indexed)
   942  		m.changeJobPhaseLocked(ctx, jobID,
   943  			keybase1.SimpleFSArchiveJobPhase_Indexed)
   944  		return true
   945  	case keybase1.SimpleFSArchiveJobPhase_Zipping:
   946  		m.simpleFS.log.CDebugf(ctx, "resetting %s phase from %s to %s", jobID,
   947  			keybase1.SimpleFSArchiveJobPhase_Zipping,
   948  			keybase1.SimpleFSArchiveJobPhase_Copied)
   949  		m.changeJobPhaseLocked(ctx, jobID,
   950  			keybase1.SimpleFSArchiveJobPhase_Copied)
   951  		return true
   952  	default:
   953  		m.simpleFS.log.CDebugf(ctx, "not resetting %s phase from %s", jobID,
   954  			m.state.Jobs[jobID].Phase)
   955  		return false
   956  	}
   957  }
   958  
   959  func (m *archiveManager) errorRetryWorker(ctx context.Context) {
   960  	ticker := time.NewTicker(time.Second * 5)
   961  	for {
   962  		select {
   963  		case <-ctx.Done():
   964  			return
   965  		case <-ticker.C:
   966  		}
   967  
   968  		func() {
   969  			m.mu.Lock()
   970  			defer m.mu.Unlock()
   971  			jobIDs := make([]string, len(m.state.Jobs))
   972  			for jobID := range m.state.Jobs {
   973  				jobIDs = append(jobIDs, jobID)
   974  			}
   975  		loopJobIDs:
   976  			for _, jobID := range jobIDs {
   977  				errState, ok := m.errors[jobID]
   978  				if !ok {
   979  					continue loopJobIDs
   980  				}
   981  				if time.Now().Before(errState.nextRetry) {
   982  					continue loopJobIDs
   983  				}
   984  				m.simpleFS.log.CDebugf(ctx, "retrying job %s", jobID)
   985  				changed := m.resetInterruptedPhaseLocked(ctx, jobID)
   986  				if !changed {
   987  					m.simpleFS.log.CWarningf(ctx,
   988  						"job %s has an error state %v but an unexpected job phase",
   989  						jobID, errState.err)
   990  					continue loopJobIDs
   991  				}
   992  				delete(m.errors, jobID)
   993  
   994  				m.signal(m.indexingWorkerSignal)
   995  				m.signal(m.copyingWorkerSignal)
   996  				m.signal(m.zippingWorkerSignal)
   997  			}
   998  		}()
   999  	}
  1000  }
  1001  
  1002  func (m *archiveManager) notifyUIStateChangeWorker(ctx context.Context) {
  1003  	limiter := rate.NewLimiter(rate.Every(time.Second/2), 1)
  1004  	for {
  1005  		select {
  1006  		case <-ctx.Done():
  1007  			return
  1008  		case <-m.notifyUIStateChangeSignal:
  1009  		}
  1010  		limiter.Wait(ctx)
  1011  
  1012  		m.notifyUIStateChange(ctx)
  1013  	}
  1014  }
  1015  
  1016  func (m *archiveManager) start() {
  1017  	ctx := context.Background()
  1018  	ctx, m.ctxCancel = context.WithCancel(ctx)
  1019  	go m.indexingWorker(m.simpleFS.makeContext(ctx))
  1020  	go m.copyingWorker(m.simpleFS.makeContext(ctx))
  1021  	go m.zippingWorker(m.simpleFS.makeContext(ctx))
  1022  	go m.errorRetryWorker(m.simpleFS.makeContext(ctx))
  1023  	go m.notifyUIStateChangeWorker(m.simpleFS.makeContext(ctx))
  1024  	m.signal(m.indexingWorkerSignal)
  1025  	m.signal(m.copyingWorkerSignal)
  1026  	m.signal(m.zippingWorkerSignal)
  1027  }
  1028  
  1029  func (m *archiveManager) resetInterruptedPhasesLocked(ctx context.Context) {
  1030  	// We don't resume indexing and zipping work, so just reset them here.
  1031  	// Copying is resumable but we have per file state tracking so reset the
  1032  	// phase here as well.
  1033  	for jobID := range m.state.Jobs {
  1034  		_ = m.resetInterruptedPhaseLocked(ctx, jobID)
  1035  	}
  1036  }
  1037  
  1038  func newArchiveManager(simpleFS *SimpleFS) (m *archiveManager, err error) {
  1039  	ctx := context.Background()
  1040  	simpleFS.log.CDebugf(ctx, "+ newArchiveManager")
  1041  	defer simpleFS.log.CDebugf(ctx, "- newArchiveManager")
  1042  	m = &archiveManager{
  1043  		simpleFS:                  simpleFS,
  1044  		jobCtxCancellers:          make(map[string]func()),
  1045  		errors:                    make(map[string]errorState),
  1046  		indexingWorkerSignal:      make(chan struct{}, 1),
  1047  		copyingWorkerSignal:       make(chan struct{}, 1),
  1048  		zippingWorkerSignal:       make(chan struct{}, 1),
  1049  		notifyUIStateChangeSignal: make(chan struct{}, 1),
  1050  	}
  1051  	stateFilePath := getStateFilePath(simpleFS)
  1052  	m.state, err = loadArchiveStateFromJsonGz(ctx, simpleFS, stateFilePath)
  1053  	switch err {
  1054  	case nil:
  1055  		if m.state.Jobs == nil {
  1056  			m.state.Jobs = make(map[string]keybase1.SimpleFSArchiveJobState)
  1057  		}
  1058  		m.resetInterruptedPhasesLocked(ctx)
  1059  	default:
  1060  		simpleFS.log.CErrorf(ctx, "loadArchiveStateFromJsonGz error ( %v ). Creating a new state.", err)
  1061  		m.state = &keybase1.SimpleFSArchiveState{
  1062  			Jobs: make(map[string]keybase1.SimpleFSArchiveJobState),
  1063  		}
  1064  		err = writeArchiveStateIntoJsonGz(ctx, simpleFS, stateFilePath, m.state)
  1065  		if err != nil {
  1066  			simpleFS.log.CErrorf(ctx, "newArchiveManager: creating state file error: %v", err)
  1067  			return nil, err
  1068  		}
  1069  	}
  1070  	m.start()
  1071  	return m, nil
  1072  }