github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/chat/archive.go (about)

     1  package chat
     2  
     3  import (
     4  	"archive/tar"
     5  	"compress/gzip"
     6  	"context"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"path"
    12  	"path/filepath"
    13  	"sort"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/keybase/client/go/chat/attachments"
    18  	"github.com/keybase/client/go/chat/globals"
    19  	"github.com/keybase/client/go/chat/storage"
    20  	"github.com/keybase/client/go/chat/types"
    21  	"github.com/keybase/client/go/chat/utils"
    22  	"github.com/keybase/client/go/chatrender"
    23  	"github.com/keybase/client/go/encrypteddb"
    24  	"github.com/keybase/client/go/libkb"
    25  	"github.com/keybase/client/go/protocol/chat1"
    26  	"github.com/keybase/client/go/protocol/gregor1"
    27  	"github.com/keybase/client/go/protocol/keybase1"
    28  	"github.com/keybase/clockwork"
    29  	"golang.org/x/sync/errgroup"
    30  )
    31  
    32  type ChatArchiveRegistry struct {
    33  	globals.Contextified
    34  	utils.DebugLabeler
    35  	sync.Mutex
    36  
    37  	started bool
    38  	uid     gregor1.UID
    39  	// Have we populated from disk?
    40  	inited bool
    41  	// Delay before we restart paused jobs on startup
    42  	resumeJobsDelay time.Duration
    43  	flushDelay      time.Duration
    44  	stopCh          chan struct{}
    45  	clock           clockwork.Clock
    46  	eg              errgroup.Group
    47  	// Changes to flush to disk?
    48  	dirty        bool
    49  	remoteClient func() chat1.RemoteInterface
    50  	runningJobs  map[chat1.ArchiveJobID]types.PauseArchiveFn
    51  
    52  	edb        *encrypteddb.EncryptedDB
    53  	jobHistory chat1.ArchiveChatHistory
    54  }
    55  
    56  type ArchiveJobNotFoundError struct {
    57  	jobID chat1.ArchiveJobID
    58  }
    59  
    60  func (e ArchiveJobNotFoundError) Error() string {
    61  	return fmt.Sprintf("job not found: %s", e.jobID)
    62  }
    63  
    64  func NewArchiveJobNotFoundError(jobID chat1.ArchiveJobID) ArchiveJobNotFoundError {
    65  	return ArchiveJobNotFoundError{jobID: jobID}
    66  }
    67  
    68  var _ error = ArchiveJobNotFoundError{}
    69  
    70  func NewChatArchiveRegistry(g *globals.Context, remoteClient func() chat1.RemoteInterface) *ChatArchiveRegistry {
    71  	keyFn := func(ctx context.Context) ([32]byte, error) {
    72  		return storage.GetSecretBoxKey(ctx, g.ExternalG())
    73  	}
    74  	dbFn := func(g *libkb.GlobalContext) *libkb.JSONLocalDb {
    75  		return g.LocalChatDb
    76  	}
    77  	r := &ChatArchiveRegistry{
    78  		Contextified: globals.NewContextified(g),
    79  		DebugLabeler: utils.NewDebugLabeler(g.ExternalG(), "ChatArchiveRegistry", false),
    80  		remoteClient: remoteClient,
    81  		clock:        clockwork.NewRealClock(),
    82  		flushDelay:   15 * time.Second,
    83  		runningJobs:  make(map[chat1.ArchiveJobID]types.PauseArchiveFn),
    84  		jobHistory:   chat1.ArchiveChatHistory{JobHistory: make(map[chat1.ArchiveJobID]chat1.ArchiveChatJob)},
    85  		edb:          encrypteddb.New(g.ExternalG(), dbFn, keyFn),
    86  	}
    87  	switch r.G().GetAppType() {
    88  	case libkb.MobileAppType:
    89  		r.resumeJobsDelay = 30 * time.Second
    90  	default:
    91  		r.resumeJobsDelay = 30 * time.Second
    92  	}
    93  	return r
    94  }
    95  
    96  func (r *ChatArchiveRegistry) dbKey() libkb.DbKey {
    97  	version := 0
    98  	key := fmt.Sprintf("ar:%d:%s", version, r.uid)
    99  	return libkb.DbKey{
   100  		Typ: libkb.DBChatArchiveRegistry,
   101  		Key: key,
   102  	}
   103  }
   104  
   105  func (r *ChatArchiveRegistry) initLocked(ctx context.Context) error {
   106  	select {
   107  	case <-ctx.Done():
   108  		return ctx.Err()
   109  	default:
   110  	}
   111  	if !r.started {
   112  		return errors.New("not started")
   113  	}
   114  	if r.inited {
   115  		return nil
   116  	}
   117  	found, err := r.edb.Get(ctx, r.dbKey(), &r.jobHistory)
   118  	if err != nil {
   119  		return err
   120  	}
   121  	if !found {
   122  		r.jobHistory = chat1.ArchiveChatHistory{JobHistory: make(map[chat1.ArchiveJobID]chat1.ArchiveChatJob)}
   123  	}
   124  	r.inited = true
   125  	return nil
   126  }
   127  
   128  func (r *ChatArchiveRegistry) flushLocked(ctx context.Context) error {
   129  	if r.dirty {
   130  		err := r.edb.Put(ctx, r.dbKey(), r.jobHistory)
   131  		if err != nil {
   132  			return err
   133  		}
   134  		r.dirty = false
   135  	}
   136  	return nil
   137  }
   138  
   139  func (r *ChatArchiveRegistry) flushLoop(stopCh chan struct{}) error {
   140  	ctx := context.Background()
   141  	r.Debug(ctx, "flushLoop: starting")
   142  	for {
   143  		select {
   144  		case <-stopCh:
   145  			r.Debug(ctx, "flushLoop: shutting down")
   146  			return nil
   147  		case <-r.clock.After(r.flushDelay):
   148  			func() {
   149  				var err error
   150  				defer r.Trace(ctx, &err, "flushLoop")()
   151  				r.Lock()
   152  				defer r.Unlock()
   153  				err = r.flushLocked(ctx)
   154  				if err != nil {
   155  					r.Debug(ctx, "flushLoop: failed to flush: %s", err)
   156  				}
   157  			}()
   158  		}
   159  	}
   160  }
   161  
   162  func (r *ChatArchiveRegistry) resumeAllBgJobs(ctx context.Context) (err error) {
   163  	defer r.Trace(ctx, &err, "resumeAllBgJobs")()
   164  	select {
   165  	case <-r.stopCh:
   166  		return nil
   167  	case <-ctx.Done():
   168  		return ctx.Err()
   169  	case <-time.After(r.resumeJobsDelay):
   170  	}
   171  	r.Lock()
   172  	defer r.Unlock()
   173  	err = r.initLocked(ctx)
   174  	if err != nil {
   175  		return err
   176  	}
   177  	for _, job := range r.jobHistory.JobHistory {
   178  		if job.Status == chat1.ArchiveChatJobStatus_BACKGROUND_PAUSED {
   179  			go func(job chat1.ArchiveChatJob) {
   180  				ctx := globals.ChatCtx(context.Background(), r.G(), keybase1.TLFIdentifyBehavior_CHAT_GUI, nil, NewSimpleIdentifyNotifier(r.G()))
   181  				_, err := NewChatArchiver(r.G(), r.uid, r.remoteClient).ArchiveChat(ctx, job.Request)
   182  				if err != nil {
   183  					r.Debug(ctx, err.Error())
   184  				}
   185  			}(job)
   186  		}
   187  	}
   188  	return nil
   189  }
   190  
   191  func (r *ChatArchiveRegistry) monitorAppState() error {
   192  	appState := keybase1.MobileAppState_FOREGROUND
   193  	ctx, cancel := context.WithCancel(context.Background())
   194  	for {
   195  		select {
   196  		case <-r.stopCh:
   197  			cancel()
   198  			return nil
   199  		case appState = <-r.G().MobileAppState.NextUpdate(&appState):
   200  			r.Debug(ctx, "monitorAppState: next state -> %v", appState)
   201  			switch appState {
   202  			case keybase1.MobileAppState_FOREGROUND:
   203  				go func() {
   204  					ierr := r.resumeAllBgJobs(ctx)
   205  					if ierr != nil {
   206  						r.Debug(ctx, ierr.Error())
   207  					}
   208  				}()
   209  			default:
   210  				cancel()
   211  				ctx, cancel = context.WithCancel(context.Background())
   212  
   213  				func() {
   214  					var err error
   215  					defer r.Trace(ctx, &err, "monitorAppState")()
   216  					r.Lock()
   217  					defer r.Unlock()
   218  					err = r.bgPauseAllJobsLocked(ctx)
   219  				}()
   220  			}
   221  		}
   222  	}
   223  }
   224  
   225  // Resumes previously BACKGROUND_PAUSED jobs, after a delay.
   226  func (r *ChatArchiveRegistry) Start(ctx context.Context, uid gregor1.UID) {
   227  	defer r.Trace(ctx, nil, "Start")()
   228  	r.Lock()
   229  	defer r.Unlock()
   230  	if r.started {
   231  		return
   232  	}
   233  	r.uid = uid
   234  	r.started = true
   235  	r.stopCh = make(chan struct{})
   236  	r.eg.Go(func() error {
   237  		return r.flushLoop(r.stopCh)
   238  	})
   239  	r.eg.Go(func() error {
   240  		return r.resumeAllBgJobs(context.Background())
   241  	})
   242  	r.eg.Go(r.monitorAppState)
   243  }
   244  
   245  func (r *ChatArchiveRegistry) bgPauseAllJobsLocked(ctx context.Context) (err error) {
   246  	defer r.Trace(ctx, &err, "bgPauseAllJobsLocked")()
   247  	err = r.initLocked(ctx)
   248  	if err != nil {
   249  		return err
   250  	}
   251  
   252  	for jobID, pause := range r.runningJobs {
   253  		if pause == nil {
   254  			continue
   255  		}
   256  		pause()
   257  		job, ok := r.jobHistory.JobHistory[jobID]
   258  		if !ok {
   259  			continue
   260  		}
   261  		job.Status = chat1.ArchiveChatJobStatus_BACKGROUND_PAUSED
   262  		r.jobHistory.JobHistory[jobID] = job
   263  	}
   264  	r.runningJobs = make(map[chat1.ArchiveJobID]types.PauseArchiveFn)
   265  
   266  	r.dirty = true
   267  	err = r.flushLocked(ctx)
   268  	return err
   269  }
   270  
   271  // Pause running jobs marking as BACKGROUND_PAUSED
   272  func (r *ChatArchiveRegistry) Stop(ctx context.Context) chan struct{} {
   273  	defer r.Trace(ctx, nil, "Stop")()
   274  	r.Lock()
   275  	defer r.Unlock()
   276  	ch := make(chan struct{})
   277  	if r.started {
   278  		err := r.bgPauseAllJobsLocked(ctx)
   279  		if err != nil {
   280  			r.Debug(ctx, err.Error())
   281  		}
   282  		r.started = false
   283  		close(r.stopCh)
   284  		go func() {
   285  			r.Debug(context.Background(), "Stop: waiting for shutdown")
   286  			_ = r.eg.Wait()
   287  			r.Debug(context.Background(), "Stop: shutdown complete")
   288  			close(ch)
   289  		}()
   290  	} else {
   291  		close(ch)
   292  	}
   293  	return ch
   294  
   295  }
   296  
   297  func (r *ChatArchiveRegistry) OnDbNuke(mctx libkb.MetaContext) (err error) {
   298  	defer r.Trace(mctx.Ctx(), &err, "ChatArchiveRegistry.OnDbNuke")()
   299  	r.Lock()
   300  	defer r.Unlock()
   301  	if !r.started {
   302  		return nil
   303  	}
   304  	r.inited = false
   305  	return nil
   306  }
   307  
   308  type ByJobStartedAt []chat1.ArchiveChatJob
   309  
   310  func (c ByJobStartedAt) Len() int      { return len(c) }
   311  func (c ByJobStartedAt) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
   312  func (c ByJobStartedAt) Less(i, j int) bool {
   313  	x := c[i]
   314  	y := c[j]
   315  	if x.StartedAt == y.StartedAt {
   316  		return x.Request.JobID < y.Request.JobID
   317  	}
   318  	return c[i].StartedAt.Before(c[j].StartedAt)
   319  }
   320  
   321  func (r *ChatArchiveRegistry) List(ctx context.Context) (res chat1.ArchiveChatListRes, err error) {
   322  	defer r.Trace(ctx, &err, "List")()
   323  	r.Lock()
   324  	defer r.Unlock()
   325  	err = r.initLocked(ctx)
   326  	if err != nil {
   327  		return res, err
   328  	}
   329  
   330  	for _, job := range r.jobHistory.JobHistory {
   331  		res.Jobs = append(res.Jobs, job)
   332  	}
   333  	sort.Sort(ByJobStartedAt(res.Jobs))
   334  	return res, nil
   335  }
   336  
   337  func (r *ChatArchiveRegistry) Get(ctx context.Context, jobID chat1.ArchiveJobID) (res chat1.ArchiveChatJob, err error) {
   338  	defer r.Trace(ctx, &err, "Get(%v)", jobID)()
   339  	r.Lock()
   340  	defer r.Unlock()
   341  	err = r.initLocked(ctx)
   342  	if err != nil {
   343  		return res, err
   344  	}
   345  
   346  	job, ok := r.jobHistory.JobHistory[jobID]
   347  	if !ok {
   348  		return res, NewArchiveJobNotFoundError(jobID)
   349  	}
   350  	return job, nil
   351  }
   352  
   353  func (r *ChatArchiveRegistry) Delete(ctx context.Context, jobID chat1.ArchiveJobID, deleteOutputPath bool) (err error) {
   354  	defer r.Trace(ctx, &err, "Delete(%v)", jobID)()
   355  	r.Lock()
   356  	defer r.Unlock()
   357  	err = r.initLocked(ctx)
   358  	if err != nil {
   359  		return err
   360  	}
   361  
   362  	cancel, ok := r.runningJobs[jobID]
   363  	if ok {
   364  		// Ignore the job output since we're deleting it anyway
   365  		cancel()
   366  		delete(r.runningJobs, jobID)
   367  	}
   368  	job, ok := r.jobHistory.JobHistory[jobID]
   369  	if !ok {
   370  		return NewArchiveJobNotFoundError(jobID)
   371  	}
   372  	delete(r.jobHistory.JobHistory, jobID)
   373  	r.dirty = true
   374  	if deleteOutputPath {
   375  		go func() {
   376  			_ = os.RemoveAll(job.Request.OutputPath)
   377  		}()
   378  	}
   379  	return nil
   380  }
   381  
   382  func (r *ChatArchiveRegistry) Set(ctx context.Context, cancel types.PauseArchiveFn, job chat1.ArchiveChatJob) (err error) {
   383  	defer r.Trace(ctx, &err, "Set(%v) -> %v", job.Request.JobID, job.Status)()
   384  	r.Lock()
   385  	defer r.Unlock()
   386  	err = r.initLocked(ctx)
   387  	if err != nil {
   388  		return err
   389  	}
   390  
   391  	jobID := job.Request.JobID
   392  	switch job.Status {
   393  	case chat1.ArchiveChatJobStatus_COMPLETE, chat1.ArchiveChatJobStatus_ERROR:
   394  		delete(r.runningJobs, jobID)
   395  	case chat1.ArchiveChatJobStatus_RUNNING:
   396  		if cancel != nil {
   397  			r.runningJobs[jobID] = cancel
   398  		}
   399  	}
   400  
   401  	r.jobHistory.JobHistory[jobID] = job.DeepCopy()
   402  	r.dirty = true
   403  	return nil
   404  }
   405  
   406  func (r *ChatArchiveRegistry) Pause(ctx context.Context, jobID chat1.ArchiveJobID) (err error) {
   407  	defer r.Trace(ctx, &err, "Pause(%v)", jobID)()
   408  	r.Lock()
   409  	defer r.Unlock()
   410  
   411  	err = r.initLocked(ctx)
   412  	if err != nil {
   413  		return err
   414  	}
   415  
   416  	job, ok := r.jobHistory.JobHistory[jobID]
   417  	if !ok {
   418  		return NewArchiveJobNotFoundError(jobID)
   419  	}
   420  
   421  	if job.Status != chat1.ArchiveChatJobStatus_RUNNING {
   422  		return fmt.Errorf("Cannot pause a non-running job. Found status %v", job.Status)
   423  	}
   424  
   425  	pause, ok := r.runningJobs[jobID]
   426  	if !ok {
   427  		return NewArchiveJobNotFoundError(jobID)
   428  	}
   429  	if pause == nil {
   430  		return fmt.Errorf("pause unexpectedly nil")
   431  	}
   432  	delete(r.runningJobs, jobID)
   433  
   434  	pause()
   435  	job.Status = chat1.ArchiveChatJobStatus_PAUSED
   436  	r.jobHistory.JobHistory[jobID] = job
   437  	r.dirty = true
   438  	return nil
   439  }
   440  
   441  func (r *ChatArchiveRegistry) Resume(ctx context.Context, jobID chat1.ArchiveJobID) (err error) {
   442  	defer r.Trace(ctx, &err, "Resume(%v)", jobID)()
   443  	r.Lock()
   444  	defer r.Unlock()
   445  
   446  	err = r.initLocked(ctx)
   447  	if err != nil {
   448  		return err
   449  	}
   450  
   451  	job, ok := r.jobHistory.JobHistory[jobID]
   452  	if !ok {
   453  		return NewArchiveJobNotFoundError(jobID)
   454  	}
   455  
   456  	switch job.Status {
   457  	case chat1.ArchiveChatJobStatus_ERROR:
   458  	case chat1.ArchiveChatJobStatus_PAUSED:
   459  	case chat1.ArchiveChatJobStatus_BACKGROUND_PAUSED:
   460  	default:
   461  		return fmt.Errorf("Cannot resume a non-paused job. Found status %v", job.Status)
   462  	}
   463  
   464  	// Resume the job in the background, the job will register itself as running
   465  	go func() {
   466  		ctx := globals.ChatCtx(context.Background(), r.G(), keybase1.TLFIdentifyBehavior_CHAT_GUI, nil, NewSimpleIdentifyNotifier(r.G()))
   467  		_, err := NewChatArchiver(r.G(), r.uid, r.remoteClient).ArchiveChat(ctx, job.Request)
   468  		if err != nil {
   469  			r.Debug(ctx, err.Error())
   470  		}
   471  	}()
   472  	return nil
   473  }
   474  
   475  var _ types.ChatArchiveRegistry = (*ChatArchiveRegistry)(nil)
   476  
   477  const defaultPageSizeDesktop = 1000
   478  const defaultPageSizeMobile = 300
   479  
   480  // Fullfil an archive query
   481  type ChatArchiver struct {
   482  	globals.Contextified
   483  	utils.DebugLabeler
   484  	uid gregor1.UID
   485  
   486  	pageSize int
   487  
   488  	sync.Mutex
   489  	remoteClient func() chat1.RemoteInterface
   490  }
   491  
   492  func NewChatArchiver(g *globals.Context, uid gregor1.UID, remoteClient func() chat1.RemoteInterface) *ChatArchiver {
   493  	c := &ChatArchiver{
   494  		Contextified: globals.NewContextified(g),
   495  		DebugLabeler: utils.NewDebugLabeler(g.ExternalG(), "ChatArchiver", false),
   496  		uid:          uid,
   497  		remoteClient: remoteClient,
   498  	}
   499  	switch c.G().GetAppType() {
   500  	case libkb.MobileAppType:
   501  		c.pageSize = defaultPageSizeMobile
   502  	default:
   503  		c.pageSize = defaultPageSizeDesktop
   504  	}
   505  	return c
   506  }
   507  
   508  func (c *ChatArchiver) notifyProgress(ctx context.Context, jobID chat1.ArchiveJobID, msgsComplete, msgsTotal int64) {
   509  	c.Debug(ctx, "notifyProgress(%s) %d/%d", jobID, msgsComplete, msgsTotal)
   510  	c.G().NotifyRouter.HandleChatArchiveProgress(ctx, jobID, msgsComplete, msgsTotal)
   511  }
   512  
   513  func (c *ChatArchiver) archiveName(conv chat1.ConversationLocal) string {
   514  	return chatrender.ConvName(c.G().GlobalContext, conv, c.G().GlobalContext.Env.GetUsername().String())
   515  }
   516  
   517  func (c *ChatArchiver) attachmentName(msg chat1.MessageUnboxedValid) string {
   518  	body := msg.MessageBody
   519  	typ, err := body.MessageType()
   520  	if err != nil {
   521  		return ""
   522  	}
   523  	if typ == chat1.MessageType_ATTACHMENT {
   524  		att := body.Attachment()
   525  		return fmt.Sprintf("%s (%d) - %s", gregor1.FromTime(msg.ServerHeader.Ctime).Format("2006-01-02 15.04.05"), msg.ServerHeader.MessageID, att.Object.Filename)
   526  	}
   527  	return ""
   528  }
   529  
   530  func (c *ChatArchiver) checkpointConv(ctx context.Context, f *os.File, checkpoint chat1.ArchiveChatConvCheckpoint, convID chat1.ConversationID, job *chat1.ArchiveChatJob) (msgsComplete, msgsTotal int64, err error) {
   531  	// Flush and update the registry
   532  	err = f.Sync()
   533  	if err != nil {
   534  		return 0, 0, err
   535  	}
   536  	stat, err := f.Stat()
   537  	if err != nil {
   538  		return 0, 0, err
   539  	}
   540  	checkpoint.Offset = stat.Size()
   541  	c.Debug(ctx, "checkpointConv %+v", checkpoint)
   542  
   543  	c.Lock()
   544  	defer c.Unlock()
   545  	job.MessagesComplete += int64(checkpoint.Pagination.Num)
   546  	if job.MessagesComplete > job.MessagesTotal {
   547  		// total messages is capped to the convs expunge, don't over report.
   548  		job.MessagesComplete = job.MessagesTotal
   549  	}
   550  	// Add this conv's individual progress.
   551  	job.Checkpoints[convID.DbShortFormString()] = checkpoint
   552  
   553  	err = c.G().ArchiveRegistry.Set(ctx, nil, *job)
   554  	return job.MessagesComplete, job.MessagesTotal, err
   555  }
   556  
   557  func (c *ChatArchiver) archiveConv(ctx context.Context, jobReq chat1.ArchiveChatJobRequest, job *chat1.ArchiveChatJob, conv chat1.ConversationLocal) error {
   558  	c.Lock()
   559  	checkpoint, ok := job.Checkpoints[conv.Info.Id.DbShortFormString()]
   560  	c.Unlock()
   561  	if !ok {
   562  		checkpoint = chat1.ArchiveChatConvCheckpoint{}
   563  	} else {
   564  		c.Debug(ctx, "Resuming from checkpoint %+v", checkpoint)
   565  	}
   566  
   567  	convArchivePath := path.Join(job.Request.OutputPath, c.archiveName(conv), "chat.txt")
   568  	f, err := os.OpenFile(convArchivePath, os.O_RDWR|os.O_CREATE, libkb.PermFile)
   569  	if err != nil {
   570  		return err
   571  	}
   572  	err = f.Truncate(checkpoint.Offset)
   573  	if err != nil {
   574  		return err
   575  	}
   576  	_, err = f.Seek(checkpoint.Offset, 0)
   577  	if err != nil {
   578  		return err
   579  	}
   580  	defer f.Close()
   581  
   582  	firstPage := checkpoint.Offset == 0
   583  	for !checkpoint.Pagination.Last {
   584  		// Walk forward through the thread
   585  		checkpoint.Pagination.Num = c.pageSize
   586  		checkpoint.Pagination.Previous = nil
   587  		thread, err := c.G().ConvSource.Pull(ctx, conv.Info.Id, c.uid,
   588  			chat1.GetThreadReason_ARCHIVE, nil,
   589  			&chat1.GetThreadQuery{
   590  				MarkAsRead: false,
   591  			}, &checkpoint.Pagination)
   592  		if err != nil {
   593  			return err
   594  		}
   595  
   596  		msgs := thread.Messages
   597  		// reverse the thread in place so we render in descending order in the file.
   598  		for i, j := 0, len(msgs)-1; i < j; i, j = i+1, j-1 {
   599  			msgs[i], msgs[j] = msgs[j], msgs[i]
   600  		}
   601  
   602  		if len(msgs) == 0 {
   603  			continue
   604  		}
   605  
   606  		view := chatrender.ConversationView{
   607  			Conversation: conv,
   608  			Messages:     msgs,
   609  			Opts: chatrender.RenderOptions{
   610  				UseDateTime: true,
   611  				// Only show the headline message once
   612  				SkipHeadline: !firstPage,
   613  			},
   614  		}
   615  
   616  		err = view.RenderToWriter(c.G().GlobalContext, f, 1024, false)
   617  		if err != nil {
   618  			return err
   619  		}
   620  
   621  		// Check for any attachment messages and download them alongside the chat.
   622  		var eg errgroup.Group
   623  		// Fetch attachments in parallel but limit the number since we
   624  		// also allow parallel conv fetching.
   625  		eg.SetLimit(5)
   626  		for _, m := range msgs {
   627  			if !m.IsValidFull() {
   628  				continue
   629  			}
   630  			msg := m.Valid()
   631  			body := msg.MessageBody
   632  			typ, err := body.MessageType()
   633  			if err != nil {
   634  				return err
   635  			}
   636  			if typ == chat1.MessageType_ATTACHMENT {
   637  				eg.Go(func() error {
   638  					attachmentPath := path.Join(jobReq.OutputPath, c.archiveName(conv), c.attachmentName(msg))
   639  					f, err := os.Create(attachmentPath)
   640  					if err != nil {
   641  						return err
   642  					}
   643  					defer f.Close()
   644  
   645  					err = attachments.Download(ctx, c.G(), c.uid, conv.Info.Id,
   646  						msg.ServerHeader.MessageID, f, false, func(_, _ int64) {}, c.remoteClient)
   647  					if err != nil {
   648  						return err
   649  					}
   650  					return nil
   651  				})
   652  			}
   653  		}
   654  		err = eg.Wait()
   655  		if err != nil {
   656  			return err
   657  		}
   658  
   659  		// update our pagination so we can correctly fetch the next page
   660  		// and marking progress in our checkpoint.
   661  		firstPage = false
   662  		checkpoint.Pagination = *thread.Pagination
   663  		msgsComplete, msgsTotal, err := c.checkpointConv(ctx, f, checkpoint, conv.Info.Id, job)
   664  		if err != nil {
   665  			return err
   666  		}
   667  
   668  		// update our progress percentage in the UI
   669  		c.notifyProgress(ctx, jobReq.JobID, msgsComplete, msgsTotal)
   670  	}
   671  	return nil
   672  }
   673  
   674  func (c *ChatArchiver) ArchiveChat(ctx context.Context, arg chat1.ArchiveChatJobRequest) (outpath string, err error) {
   675  	defer c.Trace(ctx, &err, "ArchiveChat")()
   676  
   677  	if len(arg.OutputPath) == 0 {
   678  		switch c.G().GetAppType() {
   679  		case libkb.MobileAppType:
   680  			arg.OutputPath = path.Join(c.G().GlobalContext.Env.GetCacheDir(), fmt.Sprintf("kbchat-%s", arg.JobID))
   681  		default:
   682  			arg.OutputPath = path.Join(c.G().GlobalContext.Env.GetDownloadsDir(), fmt.Sprintf("kbchat-%s", arg.JobID))
   683  		}
   684  	}
   685  
   686  	jobInfo, err := c.G().ArchiveRegistry.Get(ctx, arg.JobID)
   687  	if err != nil {
   688  		if _, ok := err.(ArchiveJobNotFoundError); !ok {
   689  			return "", err
   690  		}
   691  		jobInfo = chat1.ArchiveChatJob{
   692  			Request:     arg,
   693  			StartedAt:   gregor1.ToTime(time.Now()),
   694  			Checkpoints: make(map[string]chat1.ArchiveChatConvCheckpoint),
   695  		}
   696  	}
   697  
   698  	// Setup to run each conv in parallel
   699  	eg, ctx := errgroup.WithContext(ctx)
   700  	ctx, cancelCtx := context.WithCancel(ctx)
   701  	// Make an explicit pause distinct from other ctx cancellation
   702  	pauseCh := make(chan struct{})
   703  	pause := func() {
   704  		defer c.Trace(ctx, nil, "ArchiveChat.pause")()
   705  		close(pauseCh)
   706  		cancelCtx()
   707  	}
   708  	// And update our state when we exit
   709  	defer func() {
   710  		defer c.Trace(ctx, &err, "ArchiveChat.cleanup")()
   711  		select {
   712  		case <-pauseCh:
   713  			c.Debug(ctx, "canceled by registry, short-circuiting.")
   714  			// If we were canceled by the registry, abort.
   715  			err = fmt.Errorf("Archive job paused")
   716  		default:
   717  			// Update the registry
   718  			jobInfo.Status = chat1.ArchiveChatJobStatus_COMPLETE
   719  			if err != nil {
   720  				jobInfo.Status = chat1.ArchiveChatJobStatus_ERROR
   721  				jobInfo.Err = err.Error()
   722  			}
   723  
   724  			// Write even if our context was canceled
   725  			ierr := c.G().ArchiveRegistry.Set(context.TODO(), nil, jobInfo)
   726  			if ierr != nil {
   727  				c.Debug(ctx, "ArchiveChat.cleanup %v", ierr)
   728  			}
   729  		}
   730  
   731  		// Alert the UI
   732  		c.G().NotifyRouter.HandleChatArchiveComplete(ctx, arg.JobID)
   733  	}()
   734  
   735  	// Presume to resume
   736  	jobInfo.Status = chat1.ArchiveChatJobStatus_RUNNING
   737  	jobInfo.Err = ""
   738  
   739  	// Update the store ASAP, we will update it again once we resolve the inbox query but that may take some time.
   740  	err = c.G().ArchiveRegistry.Set(ctx, pause, jobInfo)
   741  	if err != nil {
   742  		return "", err
   743  	}
   744  
   745  	c.notifyProgress(ctx, arg.JobID, jobInfo.MessagesComplete, jobInfo.MessagesTotal)
   746  
   747  	// Make sure the root output path exists
   748  	err = os.MkdirAll(arg.OutputPath, os.ModePerm)
   749  	if err != nil {
   750  		return "", err
   751  	}
   752  
   753  	// Resolve query to a set of convIDs.
   754  	iboxRes, _, err := c.G().InboxSource.Read(ctx, c.uid, types.ConversationLocalizerBlocking,
   755  		types.InboxSourceDataSourceAll, nil, arg.Query)
   756  	if err != nil {
   757  		return "", err
   758  	}
   759  	convs := iboxRes.Convs
   760  
   761  	// Fetch size of each conv to track progress.
   762  	var totalMsgs int64
   763  	for _, conv := range convs {
   764  		totalMsgs += int64(conv.MaxVisibleMsgID() - conv.GetMaxDeletedUpTo())
   765  
   766  		convArchivePath := path.Join(arg.OutputPath, c.archiveName(conv))
   767  		err = os.MkdirAll(convArchivePath, os.ModePerm)
   768  		if err != nil {
   769  			return "", err
   770  		}
   771  	}
   772  
   773  	jobInfo.MessagesTotal = totalMsgs
   774  	jobInfo.MatchingConvs = utils.PresentConversationLocals(ctx, c.G(), c.uid, convs, utils.PresentParticipantsModeSkip)
   775  	err = c.G().ArchiveRegistry.Set(ctx, nil, jobInfo)
   776  	if err != nil {
   777  		return "", err
   778  	}
   779  	c.notifyProgress(ctx, arg.JobID, jobInfo.MessagesComplete, jobInfo.MessagesTotal)
   780  
   781  	// For each conv, fetch batches of messages until all are fetched.
   782  	//    - Messages are rendered in a text format and attachments are downloaded to the archive path.
   783  	eg.SetLimit(10)
   784  	for _, conv := range convs {
   785  		conv := conv
   786  		eg.Go(func() error {
   787  			return c.archiveConv(ctx, arg, &jobInfo, conv)
   788  		})
   789  	}
   790  	err = eg.Wait()
   791  	if err != nil {
   792  		return "", err
   793  	}
   794  
   795  	outpath = arg.OutputPath
   796  	if arg.Compress {
   797  		outpath += ".tar.gzip"
   798  		err = tarGzip(arg.OutputPath, outpath)
   799  		if err != nil {
   800  			return "", err
   801  		}
   802  		err = os.RemoveAll(arg.OutputPath)
   803  		if err != nil {
   804  			return "", err
   805  		}
   806  	}
   807  
   808  	return outpath, nil
   809  }
   810  
   811  func tarGzip(inPath, outPath string) error {
   812  	f, err := os.Create(outPath)
   813  	if err != nil {
   814  		return err
   815  	}
   816  	defer f.Close()
   817  
   818  	zr := gzip.NewWriter(f)
   819  	defer zr.Close()
   820  	tw := tar.NewWriter(zr)
   821  	defer tw.Close()
   822  
   823  	err = filepath.Walk(inPath, func(fp string, fi os.FileInfo, err error) error {
   824  		if err != nil {
   825  			return err
   826  		}
   827  		header, err := tar.FileInfoHeader(fi, fp)
   828  		if err != nil {
   829  			return err
   830  		}
   831  		name, err := filepath.Rel(inPath, filepath.ToSlash(fp))
   832  		if err != nil {
   833  			return err
   834  		}
   835  		header.Name = name
   836  
   837  		if err := tw.WriteHeader(header); err != nil {
   838  			return err
   839  		}
   840  		if fi.IsDir() {
   841  			return nil
   842  		}
   843  		file, err := os.Open(fp)
   844  		if err != nil {
   845  			return err
   846  		}
   847  		defer file.Close()
   848  		if _, err := io.Copy(tw, file); err != nil {
   849  			return err
   850  		}
   851  		return nil
   852  	})
   853  	if err != nil {
   854  		return err
   855  	}
   856  	return nil
   857  }