github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/chat/archive.go (about)

     1  package chat
     2  
     3  import (
     4  	"archive/tar"
     5  	"compress/gzip"
     6  	"context"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"path"
    12  	"path/filepath"
    13  	"sort"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/keybase/client/go/chat/attachments"
    18  	"github.com/keybase/client/go/chat/globals"
    19  	"github.com/keybase/client/go/chat/storage"
    20  	"github.com/keybase/client/go/chat/types"
    21  	"github.com/keybase/client/go/chat/utils"
    22  	"github.com/keybase/client/go/chatrender"
    23  	"github.com/keybase/client/go/encrypteddb"
    24  	"github.com/keybase/client/go/libkb"
    25  	"github.com/keybase/client/go/protocol/chat1"
    26  	"github.com/keybase/client/go/protocol/gregor1"
    27  	"github.com/keybase/client/go/protocol/keybase1"
    28  	"github.com/keybase/clockwork"
    29  	"golang.org/x/sync/errgroup"
    30  )
    31  
    32  type ChatArchiveRegistry struct {
    33  	globals.Contextified
    34  	utils.DebugLabeler
    35  	sync.Mutex
    36  
    37  	started bool
    38  	uid     gregor1.UID
    39  	// Have we populated from disk?
    40  	inited bool
    41  	// Delay before we restart paused jobs on startup
    42  	resumeJobsDelay time.Duration
    43  	flushDelay      time.Duration
    44  	stopCh          chan struct{}
    45  	clock           clockwork.Clock
    46  	eg              errgroup.Group
    47  	// Changes to flush to disk?
    48  	dirty        bool
    49  	remoteClient func() chat1.RemoteInterface
    50  	runningJobs  map[chat1.ArchiveJobID]types.CancelArchiveFn
    51  
    52  	edb        *encrypteddb.EncryptedDB
    53  	jobHistory chat1.ArchiveChatHistory
    54  }
    55  
    56  type ArchiveJobNotFoundError struct {
    57  	jobID chat1.ArchiveJobID
    58  }
    59  
    60  func (e ArchiveJobNotFoundError) Error() string {
    61  	return fmt.Sprintf("job not found: %s", e.jobID)
    62  }
    63  
    64  func NewArchiveJobNotFoundError(jobID chat1.ArchiveJobID) ArchiveJobNotFoundError {
    65  	return ArchiveJobNotFoundError{jobID: jobID}
    66  }
    67  
    68  var _ error = ArchiveJobNotFoundError{}
    69  
    70  func NewChatArchiveRegistry(g *globals.Context, remoteClient func() chat1.RemoteInterface) *ChatArchiveRegistry {
    71  	keyFn := func(ctx context.Context) ([32]byte, error) {
    72  		return storage.GetSecretBoxKey(ctx, g.ExternalG())
    73  	}
    74  	dbFn := func(g *libkb.GlobalContext) *libkb.JSONLocalDb {
    75  		return g.LocalChatDb
    76  	}
    77  	r := &ChatArchiveRegistry{
    78  		Contextified: globals.NewContextified(g),
    79  		DebugLabeler: utils.NewDebugLabeler(g.ExternalG(), "ChatArchiveRegistry", false),
    80  		remoteClient: remoteClient,
    81  		clock:        clockwork.NewRealClock(),
    82  		flushDelay:   15 * time.Second,
    83  		runningJobs:  make(map[chat1.ArchiveJobID]types.CancelArchiveFn),
    84  		jobHistory:   chat1.ArchiveChatHistory{JobHistory: make(map[chat1.ArchiveJobID]chat1.ArchiveChatJob)},
    85  		edb:          encrypteddb.New(g.ExternalG(), dbFn, keyFn),
    86  	}
    87  	switch r.G().GetAppType() {
    88  	case libkb.MobileAppType:
    89  		r.resumeJobsDelay = 30 * time.Second
    90  	default:
    91  		r.resumeJobsDelay = 30 * time.Second
    92  	}
    93  	return r
    94  }
    95  
    96  func (r *ChatArchiveRegistry) dbKey() libkb.DbKey {
    97  	version := 0
    98  	key := fmt.Sprintf("ar:%d:%s", version, r.uid)
    99  	return libkb.DbKey{
   100  		Typ: libkb.DBChatArchiveRegistry,
   101  		Key: key,
   102  	}
   103  }
   104  
   105  func (r *ChatArchiveRegistry) initLocked(ctx context.Context) error {
   106  	if !r.started {
   107  		return errors.New("not started")
   108  	}
   109  	if r.inited {
   110  		return nil
   111  	}
   112  	found, err := r.edb.Get(ctx, r.dbKey(), &r.jobHistory)
   113  	if err != nil {
   114  		return err
   115  	}
   116  	if !found {
   117  		r.jobHistory = chat1.ArchiveChatHistory{JobHistory: make(map[chat1.ArchiveJobID]chat1.ArchiveChatJob)}
   118  	}
   119  	r.inited = true
   120  	return nil
   121  }
   122  
   123  func (r *ChatArchiveRegistry) flushLocked(ctx context.Context) error {
   124  	if r.dirty {
   125  		err := r.edb.Put(ctx, r.dbKey(), r.jobHistory)
   126  		if err != nil {
   127  			return err
   128  		}
   129  		r.dirty = false
   130  	}
   131  	return nil
   132  }
   133  
   134  func (r *ChatArchiveRegistry) flushLoop(stopCh chan struct{}) error {
   135  	ctx := context.Background()
   136  	r.Debug(ctx, "flushLoop: starting")
   137  	for {
   138  		select {
   139  		case <-stopCh:
   140  			r.Debug(ctx, "flushLoop: shutting down")
   141  			return nil
   142  		case <-r.clock.After(r.flushDelay):
   143  			func() {
   144  				r.Lock()
   145  				defer r.Unlock()
   146  				err := r.flushLocked(ctx)
   147  				if err != nil {
   148  					r.Debug(ctx, "flushLoop: failed to flush: %s", err)
   149  				}
   150  			}()
   151  		}
   152  	}
   153  }
   154  
   155  func (r *ChatArchiveRegistry) resumeAllBgJobs(ctx context.Context) error {
   156  	select {
   157  	case <-r.stopCh:
   158  		return nil
   159  	case <-ctx.Done():
   160  		return ctx.Err()
   161  	case <-time.After(r.resumeJobsDelay):
   162  	}
   163  	r.Lock()
   164  	defer r.Unlock()
   165  	err := r.initLocked(ctx)
   166  	if err != nil {
   167  		return err
   168  	}
   169  	for _, job := range r.jobHistory.JobHistory {
   170  		if job.Status == chat1.ArchiveChatJobStatus_BACKGROUND_PAUSED {
   171  			go func(job chat1.ArchiveChatJob) {
   172  				ctx := globals.ChatCtx(context.Background(), r.G(), keybase1.TLFIdentifyBehavior_CHAT_GUI, nil, NewSimpleIdentifyNotifier(r.G()))
   173  				_, err := NewChatArchiver(r.G(), r.uid, r.remoteClient).ArchiveChat(ctx, job.Request)
   174  				if err != nil {
   175  					r.Debug(ctx, err.Error())
   176  				}
   177  			}(job)
   178  		}
   179  	}
   180  	return nil
   181  }
   182  
   183  func (r *ChatArchiveRegistry) monitorAppState() error {
   184  	appState := keybase1.MobileAppState_FOREGROUND
   185  	ctx, cancel := context.WithCancel(context.Background())
   186  	for {
   187  		select {
   188  		case <-r.stopCh:
   189  			cancel()
   190  			return nil
   191  		case appState = <-r.G().MobileAppState.NextUpdate(&appState):
   192  			switch appState {
   193  			case keybase1.MobileAppState_FOREGROUND:
   194  				go func() {
   195  					ierr := r.resumeAllBgJobs(ctx)
   196  					if ierr != nil {
   197  						r.Debug(ctx, ierr.Error())
   198  					}
   199  				}()
   200  			default:
   201  				cancel()
   202  				ctx, cancel = context.WithCancel(context.Background())
   203  				r.bgPauseAllJobsLocked(ctx)
   204  			}
   205  		}
   206  	}
   207  }
   208  
   209  // Resumes previously BACKGROUND_PAUSED jobs, after a delay.
   210  func (r *ChatArchiveRegistry) Start(ctx context.Context, uid gregor1.UID) {
   211  	defer r.Trace(ctx, nil, "Start")()
   212  	r.Lock()
   213  	defer r.Unlock()
   214  	if r.started {
   215  		return
   216  	}
   217  	r.uid = uid
   218  	r.started = true
   219  	r.stopCh = make(chan struct{})
   220  	r.eg.Go(func() error {
   221  		return r.flushLoop(r.stopCh)
   222  	})
   223  	r.eg.Go(func() error {
   224  		return r.resumeAllBgJobs(context.Background())
   225  	})
   226  	r.eg.Go(r.monitorAppState)
   227  }
   228  
   229  func (r *ChatArchiveRegistry) bgPauseAllJobsLocked(ctx context.Context) {
   230  	for jobID, cancel := range r.runningJobs {
   231  		job := cancel()
   232  		job.Status = chat1.ArchiveChatJobStatus_BACKGROUND_PAUSED
   233  		r.jobHistory.JobHistory[jobID] = job
   234  	}
   235  	r.runningJobs = make(map[chat1.ArchiveJobID]func() chat1.ArchiveChatJob)
   236  
   237  	r.dirty = true
   238  	_ = r.flushLocked(ctx)
   239  }
   240  
   241  // Pause running jobs marking as BACKGROUND_PAUSED
   242  func (r *ChatArchiveRegistry) Stop(ctx context.Context) chan struct{} {
   243  	defer r.Trace(ctx, nil, "Stop")()
   244  	r.Lock()
   245  	defer r.Unlock()
   246  	ch := make(chan struct{})
   247  	if r.started {
   248  		r.started = false
   249  		r.bgPauseAllJobsLocked(ctx)
   250  		close(r.stopCh)
   251  		go func() {
   252  			r.Debug(context.Background(), "Stop: waiting for shutdown")
   253  			_ = r.eg.Wait()
   254  			r.Debug(context.Background(), "Stop: shutdown complete")
   255  			close(ch)
   256  		}()
   257  	} else {
   258  		close(ch)
   259  	}
   260  	return ch
   261  
   262  }
   263  
   264  func (r *ChatArchiveRegistry) OnDbNuke(mctx libkb.MetaContext) (err error) {
   265  	defer r.Trace(mctx.Ctx(), &err, "ChatArchiveRegistry.OnDbNuke")()
   266  	r.Lock()
   267  	defer r.Unlock()
   268  	if !r.started {
   269  		return nil
   270  	}
   271  	r.inited = false
   272  	return nil
   273  }
   274  
   275  type ByJobStartedAt []chat1.ArchiveChatJob
   276  
   277  func (c ByJobStartedAt) Len() int      { return len(c) }
   278  func (c ByJobStartedAt) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
   279  func (c ByJobStartedAt) Less(i, j int) bool {
   280  	x := c[i]
   281  	y := c[j]
   282  	if x.StartedAt == y.StartedAt {
   283  		return x.Request.JobID < y.Request.JobID
   284  	}
   285  	return c[i].StartedAt.Before(c[j].StartedAt)
   286  }
   287  
   288  func (r *ChatArchiveRegistry) List(ctx context.Context) (res chat1.ArchiveChatListRes, err error) {
   289  	defer r.Trace(ctx, &err, "List")()
   290  	r.Lock()
   291  	defer r.Unlock()
   292  	err = r.initLocked(ctx)
   293  	if err != nil {
   294  		return res, err
   295  	}
   296  
   297  	for _, job := range r.jobHistory.JobHistory {
   298  		res.Jobs = append(res.Jobs, job)
   299  	}
   300  	sort.Sort(ByJobStartedAt(res.Jobs))
   301  	return res, nil
   302  }
   303  
   304  func (r *ChatArchiveRegistry) Get(ctx context.Context, jobID chat1.ArchiveJobID) (res chat1.ArchiveChatJob, err error) {
   305  	defer r.Trace(ctx, &err, "Get(%s)", jobID)()
   306  	r.Lock()
   307  	defer r.Unlock()
   308  	err = r.initLocked(ctx)
   309  	if err != nil {
   310  		return res, err
   311  	}
   312  
   313  	job, ok := r.jobHistory.JobHistory[jobID]
   314  	if !ok {
   315  		return res, NewArchiveJobNotFoundError(jobID)
   316  	}
   317  	return job, nil
   318  }
   319  
   320  func (r *ChatArchiveRegistry) Delete(ctx context.Context, jobID chat1.ArchiveJobID, deleteOutputPath bool) (err error) {
   321  	defer r.Trace(ctx, &err, "Delete(%s)", jobID)()
   322  	r.Lock()
   323  	defer r.Unlock()
   324  	err = r.initLocked(ctx)
   325  	if err != nil {
   326  		return err
   327  	}
   328  
   329  	cancel, ok := r.runningJobs[jobID]
   330  	if ok {
   331  		// Ignore the job output since we're deleting it anyway
   332  		cancel()
   333  		delete(r.runningJobs, jobID)
   334  	}
   335  	job, ok := r.jobHistory.JobHistory[jobID]
   336  	if !ok {
   337  		return NewArchiveJobNotFoundError(jobID)
   338  	}
   339  	delete(r.jobHistory.JobHistory, jobID)
   340  	r.dirty = true
   341  	if deleteOutputPath {
   342  		err = os.RemoveAll(job.Request.OutputPath)
   343  		if err != nil {
   344  			return err
   345  		}
   346  	}
   347  	return nil
   348  }
   349  
   350  func (r *ChatArchiveRegistry) Set(ctx context.Context, cancel types.CancelArchiveFn, job chat1.ArchiveChatJob) (err error) {
   351  	defer r.Trace(ctx, &err, "Set(%+v)", job)()
   352  	r.Lock()
   353  	defer r.Unlock()
   354  	err = r.initLocked(ctx)
   355  	if err != nil {
   356  		return err
   357  	}
   358  
   359  	jobID := job.Request.JobID
   360  	switch job.Status {
   361  	case chat1.ArchiveChatJobStatus_COMPLETE:
   362  		fallthrough
   363  	case chat1.ArchiveChatJobStatus_ERROR:
   364  		delete(r.runningJobs, jobID)
   365  	case chat1.ArchiveChatJobStatus_RUNNING:
   366  		if cancel != nil {
   367  			r.runningJobs[jobID] = cancel
   368  		}
   369  	}
   370  
   371  	r.jobHistory.JobHistory[jobID] = job
   372  	r.dirty = true
   373  	return nil
   374  }
   375  
   376  func (r *ChatArchiveRegistry) Pause(ctx context.Context, jobID chat1.ArchiveJobID) (err error) {
   377  	defer r.Trace(ctx, &err, "Pause(%v)", jobID)()
   378  	r.Lock()
   379  	defer r.Unlock()
   380  
   381  	err = r.initLocked(ctx)
   382  	if err != nil {
   383  		return err
   384  	}
   385  
   386  	job, ok := r.jobHistory.JobHistory[jobID]
   387  	if !ok {
   388  		return NewArchiveJobNotFoundError(jobID)
   389  	}
   390  
   391  	if job.Status != chat1.ArchiveChatJobStatus_RUNNING {
   392  		return fmt.Errorf("Cannot pause a non-running job. Found status %v", job.Status)
   393  	}
   394  
   395  	cancel, ok := r.runningJobs[jobID]
   396  	if !ok {
   397  		return NewArchiveJobNotFoundError(jobID)
   398  	}
   399  	if cancel == nil {
   400  		return fmt.Errorf("cancel unexpectedly nil")
   401  	}
   402  	delete(r.runningJobs, jobID)
   403  
   404  	job = cancel()
   405  	job.Status = chat1.ArchiveChatJobStatus_PAUSED
   406  	r.jobHistory.JobHistory[jobID] = job
   407  	r.dirty = true
   408  	return nil
   409  }
   410  
   411  func (r *ChatArchiveRegistry) Resume(ctx context.Context, jobID chat1.ArchiveJobID) (err error) {
   412  	defer r.Trace(ctx, &err, "Resume(%v)", jobID)()
   413  	r.Lock()
   414  	defer r.Unlock()
   415  
   416  	err = r.initLocked(ctx)
   417  	if err != nil {
   418  		return err
   419  	}
   420  
   421  	job, ok := r.jobHistory.JobHistory[jobID]
   422  	if !ok {
   423  		return NewArchiveJobNotFoundError(jobID)
   424  	}
   425  
   426  	switch job.Status {
   427  	case chat1.ArchiveChatJobStatus_ERROR:
   428  	case chat1.ArchiveChatJobStatus_PAUSED:
   429  	case chat1.ArchiveChatJobStatus_BACKGROUND_PAUSED:
   430  	default:
   431  		return fmt.Errorf("Cannot resume a non-paused job. Found status %v", job.Status)
   432  	}
   433  
   434  	// Resume the job in the background, the job will register itself as running
   435  	go func() {
   436  		_, err := NewChatArchiver(r.G(), r.uid, r.remoteClient).ArchiveChat(context.Background(), job.Request)
   437  		if err != nil {
   438  			r.Debug(ctx, err.Error())
   439  		}
   440  	}()
   441  	return nil
   442  }
   443  
   444  var _ types.ChatArchiveRegistry = (*ChatArchiveRegistry)(nil)
   445  
   446  const defaultPageSizeDesktop = 999
   447  const defaultPageSizeMobile = 300
   448  
   449  // Fullfil an archive query
   450  type ChatArchiver struct {
   451  	globals.Contextified
   452  	utils.DebugLabeler
   453  	uid gregor1.UID
   454  
   455  	pageSize int
   456  
   457  	sync.Mutex
   458  	messagesComplete int64
   459  	messagesTotal    int64
   460  	remoteClient     func() chat1.RemoteInterface
   461  }
   462  
   463  func NewChatArchiver(g *globals.Context, uid gregor1.UID, remoteClient func() chat1.RemoteInterface) *ChatArchiver {
   464  	c := &ChatArchiver{
   465  		Contextified: globals.NewContextified(g),
   466  		DebugLabeler: utils.NewDebugLabeler(g.ExternalG(), "ChatArchiver", false),
   467  		uid:          uid,
   468  		remoteClient: remoteClient,
   469  	}
   470  	switch c.G().GetAppType() {
   471  	case libkb.MobileAppType:
   472  		c.pageSize = defaultPageSizeMobile
   473  	default:
   474  		c.pageSize = defaultPageSizeDesktop
   475  	}
   476  	return c
   477  }
   478  
   479  func (c *ChatArchiver) notifyProgress(ctx context.Context, jobID chat1.ArchiveJobID, pagination chat1.Pagination) {
   480  	c.Lock()
   481  	defer c.Unlock()
   482  	c.messagesComplete += int64(pagination.Num)
   483  	if c.messagesComplete > c.messagesTotal || pagination.Last {
   484  		// total messages is capped to the convs expunge, don't over report.
   485  		c.messagesComplete = c.messagesTotal
   486  	}
   487  	c.G().NotifyRouter.HandleChatArchiveProgress(ctx, jobID, c.messagesComplete, c.messagesTotal)
   488  }
   489  
   490  func (c *ChatArchiver) archiveName(conv chat1.ConversationLocal) string {
   491  	return chatrender.ConvName(c.G().GlobalContext, conv, c.G().GlobalContext.Env.GetUsername().String())
   492  }
   493  
   494  func (c *ChatArchiver) attachmentName(msg chat1.MessageUnboxedValid) string {
   495  	body := msg.MessageBody
   496  	typ, err := body.MessageType()
   497  	if err != nil {
   498  		return ""
   499  	}
   500  	if typ == chat1.MessageType_ATTACHMENT {
   501  		att := body.Attachment()
   502  		return fmt.Sprintf("%s (%d) - %s", gregor1.FromTime(msg.ServerHeader.Ctime).Format("2006-01-02 15.04.05"), msg.ServerHeader.MessageID, att.Object.Filename)
   503  	}
   504  	return ""
   505  }
   506  
   507  func (c *ChatArchiver) checkpointConv(ctx context.Context, f *os.File, cp chat1.ArchiveChatConvCheckpoint, convID chat1.ConversationID, job *chat1.ArchiveChatJob) (err error) {
   508  	// Flush and update the registry
   509  	err = f.Sync()
   510  	if err != nil {
   511  		return err
   512  	}
   513  	stat, err := f.Stat()
   514  	if err != nil {
   515  		return err
   516  	}
   517  	cp.Offset = stat.Size()
   518  
   519  	c.Lock()
   520  	// Mark our overall progress.
   521  	job.MessagesTotal = c.messagesTotal
   522  	job.MessagesComplete = c.messagesComplete
   523  	// And this conv's individual progress.
   524  	job.Checkpoints[convID.DbShortFormString()] = cp
   525  	c.Unlock()
   526  
   527  	return c.G().ArchiveRegistry.Set(ctx, nil, *job)
   528  }
   529  
   530  func (c *ChatArchiver) archiveConv(ctx context.Context, job *chat1.ArchiveChatJob, conv chat1.ConversationLocal) error {
   531  	c.Lock()
   532  	cp, ok := job.Checkpoints[conv.Info.Id.DbShortFormString()]
   533  	c.Unlock()
   534  	if !ok {
   535  		cp = chat1.ArchiveChatConvCheckpoint{
   536  			Pagination: chat1.Pagination{Num: c.pageSize},
   537  			Offset:     0,
   538  		}
   539  	}
   540  
   541  	convArchivePath := path.Join(job.Request.OutputPath, c.archiveName(conv), "chat.txt")
   542  	f, err := os.OpenFile(convArchivePath, os.O_RDWR|os.O_CREATE, libkb.PermFile)
   543  	if err != nil {
   544  		return err
   545  	}
   546  	err = f.Truncate(cp.Offset)
   547  	if err != nil {
   548  		return err
   549  	}
   550  	_, err = f.Seek(cp.Offset, 0)
   551  	if err != nil {
   552  		return err
   553  	}
   554  	defer f.Close()
   555  
   556  	firstPage := cp.Offset == 0
   557  	for !cp.Pagination.Last {
   558  		thread, err := c.G().ConvSource.Pull(ctx, conv.Info.Id, c.uid,
   559  			chat1.GetThreadReason_ARCHIVE, nil,
   560  			&chat1.GetThreadQuery{
   561  				MarkAsRead: false,
   562  			}, &cp.Pagination)
   563  		if err != nil {
   564  			return err
   565  		}
   566  
   567  		msgs := thread.Messages
   568  
   569  		// reverse the thread in place so we render in descending order in the file.
   570  		for i, j := 0, len(msgs)-1; i < j; i, j = i+1, j-1 {
   571  			msgs[i], msgs[j] = msgs[j], msgs[i]
   572  		}
   573  
   574  		view := chatrender.ConversationView{
   575  			Conversation: conv,
   576  			Messages:     msgs,
   577  			Opts: chatrender.RenderOptions{
   578  				UseDateTime: true,
   579  				// Only show the headline message once
   580  				SkipHeadline: !firstPage,
   581  			},
   582  		}
   583  
   584  		err = view.RenderToWriter(c.G().GlobalContext, f, 1024, false)
   585  		if err != nil {
   586  			return err
   587  		}
   588  
   589  		// Check for any attachment messages and download them alongside the chat.
   590  		var eg errgroup.Group
   591  		// Fetch attachments in parallel but limit the number since we
   592  		// also allow parallel conv fetching.
   593  		eg.SetLimit(5)
   594  		for _, m := range msgs {
   595  			if !m.IsValidFull() {
   596  				continue
   597  			}
   598  			msg := m.Valid()
   599  			body := msg.MessageBody
   600  			typ, err := body.MessageType()
   601  			if err != nil {
   602  				return err
   603  			}
   604  			if typ == chat1.MessageType_ATTACHMENT {
   605  				eg.Go(func() error {
   606  					attachmentPath := path.Join(job.Request.OutputPath, c.archiveName(conv), c.attachmentName(msg))
   607  					f, err := os.Create(attachmentPath)
   608  					if err != nil {
   609  						return err
   610  					}
   611  					defer f.Close()
   612  
   613  					err = attachments.Download(ctx, c.G(), c.uid, conv.Info.Id,
   614  						msg.ServerHeader.MessageID, f, false, func(_, _ int64) {}, c.remoteClient)
   615  					if err != nil {
   616  						return err
   617  					}
   618  					return nil
   619  				})
   620  			}
   621  		}
   622  		err = eg.Wait()
   623  		if err != nil {
   624  			return err
   625  		}
   626  
   627  		// update our progress percentage in the UI
   628  		c.notifyProgress(ctx, job.Request.JobID, *thread.Pagination)
   629  
   630  		// update our pagination so we can correctly fetch the next page and marking progress in our checkpoint.
   631  		firstPage = false
   632  		cp.Pagination = *thread.Pagination
   633  		cp.Pagination.Num = c.pageSize
   634  		cp.Pagination.Previous = nil
   635  		ierr := c.checkpointConv(ctx, f, cp, conv.Info.Id, job)
   636  		if ierr != nil {
   637  			c.Debug(ctx, ierr.Error())
   638  		}
   639  	}
   640  	return nil
   641  }
   642  
   643  func (c *ChatArchiver) ArchiveChat(ctx context.Context, arg chat1.ArchiveChatJobRequest) (outpath string, err error) {
   644  	defer c.Trace(ctx, &err, "ArchiveChat")()
   645  
   646  	if len(arg.OutputPath) == 0 {
   647  		arg.OutputPath = path.Join(c.G().GlobalContext.Env.GetDownloadsDir(), fmt.Sprintf("kbchat-%s", arg.JobID))
   648  	}
   649  
   650  	// Make sure the root output path exists
   651  	err = os.MkdirAll(arg.OutputPath, os.ModePerm)
   652  	if err != nil {
   653  		return "", err
   654  	}
   655  
   656  	// Resolve query to a set of convIDs.
   657  	iboxRes, _, err := c.G().InboxSource.Read(ctx, c.uid, types.ConversationLocalizerBlocking,
   658  		types.InboxSourceDataSourceAll, nil, arg.Query)
   659  	if err != nil {
   660  		return "", err
   661  	}
   662  	convs := iboxRes.Convs
   663  
   664  	// Fetch size of each conv to track progress.
   665  	for _, conv := range convs {
   666  		c.messagesTotal += int64(conv.MaxVisibleMsgID() - conv.GetMaxDeletedUpTo())
   667  
   668  		convArchivePath := path.Join(arg.OutputPath, c.archiveName(conv))
   669  		err = os.MkdirAll(convArchivePath, os.ModePerm)
   670  		if err != nil {
   671  			return "", err
   672  		}
   673  	}
   674  
   675  	jobInfo, err := c.G().ArchiveRegistry.Get(ctx, arg.JobID)
   676  	if err != nil {
   677  		if _, ok := err.(ArchiveJobNotFoundError); !ok {
   678  			return "", err
   679  		}
   680  		jobInfo = chat1.ArchiveChatJob{
   681  			Request:     arg,
   682  			StartedAt:   gregor1.ToTime(time.Now()),
   683  			Checkpoints: make(map[string]chat1.ArchiveChatConvCheckpoint),
   684  		}
   685  	}
   686  	// Presume to resume
   687  	jobInfo.Status = chat1.ArchiveChatJobStatus_RUNNING
   688  	jobInfo.Err = ""
   689  
   690  	// Setup to run each conv in parallel
   691  	eg, ctx := errgroup.WithContext(ctx)
   692  	ctx, cancel := context.WithCancel(ctx)
   693  	// Closed if we are canceled
   694  	cancelCh := make(chan struct{})
   695  	doneCh := make(chan struct{})
   696  	pause := func() chat1.ArchiveChatJob {
   697  		cancel()
   698  		close(cancelCh)
   699  		// Block until we cleanup
   700  		<-doneCh
   701  		c.Lock()
   702  		defer c.Unlock()
   703  		return jobInfo
   704  	}
   705  
   706  	// Mark ourselves as running
   707  	err = c.G().ArchiveRegistry.Set(ctx, pause, jobInfo)
   708  	if err != nil {
   709  		return "", err
   710  	}
   711  	// And update our state when we exit
   712  	defer func() {
   713  		defer func() { close(doneCh) }()
   714  		select {
   715  		case <-cancelCh:
   716  			c.Debug(ctx, "canceled by registry, short-circuiting.")
   717  			// If we were canceled by the registry, abort.
   718  			return
   719  		default:
   720  		}
   721  
   722  		// Update the registry
   723  		jobInfo.Status = chat1.ArchiveChatJobStatus_COMPLETE
   724  		if err != nil {
   725  			jobInfo.Status = chat1.ArchiveChatJobStatus_ERROR
   726  			jobInfo.Err = err.Error()
   727  		}
   728  		ierr := c.G().ArchiveRegistry.Set(ctx, nil, jobInfo)
   729  		if ierr != nil {
   730  			c.Debug(ctx, ierr.Error())
   731  		}
   732  
   733  		// Alert the UI
   734  		c.G().NotifyRouter.HandleChatArchiveComplete(ctx, arg.JobID)
   735  	}()
   736  
   737  	// For each conv, fetch batches of messages until all are fetched.
   738  	//    - Messages are rendered in a text format and attachments are downloaded to the archive path.
   739  	eg.SetLimit(10)
   740  	for _, conv := range convs {
   741  		conv := conv
   742  		eg.Go(func() error {
   743  			return c.archiveConv(ctx, &jobInfo, conv)
   744  		})
   745  	}
   746  	err = eg.Wait()
   747  	if err != nil {
   748  		return "", err
   749  	}
   750  
   751  	outpath = arg.OutputPath
   752  	if arg.Compress {
   753  		outpath += ".tar.gzip"
   754  		err = tarGzip(arg.OutputPath, outpath)
   755  		if err != nil {
   756  			return "", err
   757  		}
   758  		err = os.RemoveAll(arg.OutputPath)
   759  		if err != nil {
   760  			return "", err
   761  		}
   762  	}
   763  
   764  	return outpath, nil
   765  }
   766  
   767  func tarGzip(inPath, outPath string) error {
   768  	f, err := os.Create(outPath)
   769  	if err != nil {
   770  		return err
   771  	}
   772  	defer f.Close()
   773  
   774  	zr := gzip.NewWriter(f)
   775  	defer zr.Close()
   776  	tw := tar.NewWriter(zr)
   777  	defer tw.Close()
   778  
   779  	err = filepath.Walk(inPath, func(fp string, fi os.FileInfo, err error) error {
   780  		if err != nil {
   781  			return err
   782  		}
   783  		header, err := tar.FileInfoHeader(fi, fp)
   784  		if err != nil {
   785  			return err
   786  		}
   787  		name, err := filepath.Rel(inPath, filepath.ToSlash(fp))
   788  		if err != nil {
   789  			return err
   790  		}
   791  		header.Name = name
   792  
   793  		if err := tw.WriteHeader(header); err != nil {
   794  			return err
   795  		}
   796  		if fi.IsDir() {
   797  			return nil
   798  		}
   799  		file, err := os.Open(fp)
   800  		if err != nil {
   801  			return err
   802  		}
   803  		defer file.Close()
   804  		if _, err := io.Copy(tw, file); err != nil {
   805  			return err
   806  		}
   807  		return nil
   808  	})
   809  	if err != nil {
   810  		return err
   811  	}
   812  	return nil
   813  }