github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/chat/archive.go (about) 1 package chat 2 3 import ( 4 "archive/tar" 5 "compress/gzip" 6 "context" 7 "errors" 8 "fmt" 9 "io" 10 "os" 11 "path" 12 "path/filepath" 13 "sort" 14 "sync" 15 "time" 16 17 "github.com/keybase/client/go/chat/attachments" 18 "github.com/keybase/client/go/chat/globals" 19 "github.com/keybase/client/go/chat/storage" 20 "github.com/keybase/client/go/chat/types" 21 "github.com/keybase/client/go/chat/utils" 22 "github.com/keybase/client/go/chatrender" 23 "github.com/keybase/client/go/encrypteddb" 24 "github.com/keybase/client/go/libkb" 25 "github.com/keybase/client/go/protocol/chat1" 26 "github.com/keybase/client/go/protocol/gregor1" 27 "github.com/keybase/client/go/protocol/keybase1" 28 "github.com/keybase/clockwork" 29 "golang.org/x/sync/errgroup" 30 ) 31 32 type ChatArchiveRegistry struct { 33 globals.Contextified 34 utils.DebugLabeler 35 sync.Mutex 36 37 started bool 38 uid gregor1.UID 39 // Have we populated from disk? 40 inited bool 41 // Delay before we restart paused jobs on startup 42 resumeJobsDelay time.Duration 43 flushDelay time.Duration 44 stopCh chan struct{} 45 clock clockwork.Clock 46 eg errgroup.Group 47 // Changes to flush to disk? 48 dirty bool 49 remoteClient func() chat1.RemoteInterface 50 runningJobs map[chat1.ArchiveJobID]types.PauseArchiveFn 51 52 edb *encrypteddb.EncryptedDB 53 jobHistory chat1.ArchiveChatHistory 54 } 55 56 type ArchiveJobNotFoundError struct { 57 jobID chat1.ArchiveJobID 58 } 59 60 func (e ArchiveJobNotFoundError) Error() string { 61 return fmt.Sprintf("job not found: %s", e.jobID) 62 } 63 64 func NewArchiveJobNotFoundError(jobID chat1.ArchiveJobID) ArchiveJobNotFoundError { 65 return ArchiveJobNotFoundError{jobID: jobID} 66 } 67 68 var _ error = ArchiveJobNotFoundError{} 69 70 func NewChatArchiveRegistry(g *globals.Context, remoteClient func() chat1.RemoteInterface) *ChatArchiveRegistry { 71 keyFn := func(ctx context.Context) ([32]byte, error) { 72 return storage.GetSecretBoxKey(ctx, g.ExternalG()) 73 } 74 dbFn := func(g *libkb.GlobalContext) *libkb.JSONLocalDb { 75 return g.LocalChatDb 76 } 77 r := &ChatArchiveRegistry{ 78 Contextified: globals.NewContextified(g), 79 DebugLabeler: utils.NewDebugLabeler(g.ExternalG(), "ChatArchiveRegistry", false), 80 remoteClient: remoteClient, 81 clock: clockwork.NewRealClock(), 82 flushDelay: 15 * time.Second, 83 runningJobs: make(map[chat1.ArchiveJobID]types.PauseArchiveFn), 84 jobHistory: chat1.ArchiveChatHistory{JobHistory: make(map[chat1.ArchiveJobID]chat1.ArchiveChatJob)}, 85 edb: encrypteddb.New(g.ExternalG(), dbFn, keyFn), 86 } 87 switch r.G().GetAppType() { 88 case libkb.MobileAppType: 89 r.resumeJobsDelay = 30 * time.Second 90 default: 91 r.resumeJobsDelay = 30 * time.Second 92 } 93 return r 94 } 95 96 func (r *ChatArchiveRegistry) dbKey() libkb.DbKey { 97 version := 0 98 key := fmt.Sprintf("ar:%d:%s", version, r.uid) 99 return libkb.DbKey{ 100 Typ: libkb.DBChatArchiveRegistry, 101 Key: key, 102 } 103 } 104 105 func (r *ChatArchiveRegistry) initLocked(ctx context.Context) error { 106 select { 107 case <-ctx.Done(): 108 return ctx.Err() 109 default: 110 } 111 if !r.started { 112 return errors.New("not started") 113 } 114 if r.inited { 115 return nil 116 } 117 found, err := r.edb.Get(ctx, r.dbKey(), &r.jobHistory) 118 if err != nil { 119 return err 120 } 121 if !found { 122 r.jobHistory = chat1.ArchiveChatHistory{JobHistory: make(map[chat1.ArchiveJobID]chat1.ArchiveChatJob)} 123 } 124 r.inited = true 125 return nil 126 } 127 128 func (r *ChatArchiveRegistry) flushLocked(ctx context.Context) error { 129 if r.dirty { 130 err := r.edb.Put(ctx, r.dbKey(), r.jobHistory) 131 if err != nil { 132 return err 133 } 134 r.dirty = false 135 } 136 return nil 137 } 138 139 func (r *ChatArchiveRegistry) flushLoop(stopCh chan struct{}) error { 140 ctx := context.Background() 141 r.Debug(ctx, "flushLoop: starting") 142 for { 143 select { 144 case <-stopCh: 145 r.Debug(ctx, "flushLoop: shutting down") 146 return nil 147 case <-r.clock.After(r.flushDelay): 148 func() { 149 var err error 150 defer r.Trace(ctx, &err, "flushLoop")() 151 r.Lock() 152 defer r.Unlock() 153 err = r.flushLocked(ctx) 154 if err != nil { 155 r.Debug(ctx, "flushLoop: failed to flush: %s", err) 156 } 157 }() 158 } 159 } 160 } 161 162 func (r *ChatArchiveRegistry) resumeAllBgJobs(ctx context.Context) (err error) { 163 defer r.Trace(ctx, &err, "resumeAllBgJobs")() 164 select { 165 case <-r.stopCh: 166 return nil 167 case <-ctx.Done(): 168 return ctx.Err() 169 case <-time.After(r.resumeJobsDelay): 170 } 171 r.Lock() 172 defer r.Unlock() 173 err = r.initLocked(ctx) 174 if err != nil { 175 return err 176 } 177 for _, job := range r.jobHistory.JobHistory { 178 if job.Status == chat1.ArchiveChatJobStatus_BACKGROUND_PAUSED { 179 go func(job chat1.ArchiveChatJob) { 180 ctx := globals.ChatCtx(context.Background(), r.G(), keybase1.TLFIdentifyBehavior_CHAT_GUI, nil, NewSimpleIdentifyNotifier(r.G())) 181 _, err := NewChatArchiver(r.G(), r.uid, r.remoteClient).ArchiveChat(ctx, job.Request) 182 if err != nil { 183 r.Debug(ctx, err.Error()) 184 } 185 }(job) 186 } 187 } 188 return nil 189 } 190 191 func (r *ChatArchiveRegistry) monitorAppState() error { 192 appState := keybase1.MobileAppState_FOREGROUND 193 ctx, cancel := context.WithCancel(context.Background()) 194 for { 195 select { 196 case <-r.stopCh: 197 cancel() 198 return nil 199 case appState = <-r.G().MobileAppState.NextUpdate(&appState): 200 r.Debug(ctx, "monitorAppState: next state -> %v", appState) 201 switch appState { 202 case keybase1.MobileAppState_FOREGROUND: 203 go func() { 204 ierr := r.resumeAllBgJobs(ctx) 205 if ierr != nil { 206 r.Debug(ctx, ierr.Error()) 207 } 208 }() 209 default: 210 cancel() 211 ctx, cancel = context.WithCancel(context.Background()) 212 213 func() { 214 var err error 215 defer r.Trace(ctx, &err, "monitorAppState")() 216 r.Lock() 217 defer r.Unlock() 218 err = r.bgPauseAllJobsLocked(ctx) 219 }() 220 } 221 } 222 } 223 } 224 225 // Resumes previously BACKGROUND_PAUSED jobs, after a delay. 226 func (r *ChatArchiveRegistry) Start(ctx context.Context, uid gregor1.UID) { 227 defer r.Trace(ctx, nil, "Start")() 228 r.Lock() 229 defer r.Unlock() 230 if r.started { 231 return 232 } 233 r.uid = uid 234 r.started = true 235 r.stopCh = make(chan struct{}) 236 r.eg.Go(func() error { 237 return r.flushLoop(r.stopCh) 238 }) 239 r.eg.Go(func() error { 240 return r.resumeAllBgJobs(context.Background()) 241 }) 242 r.eg.Go(r.monitorAppState) 243 } 244 245 func (r *ChatArchiveRegistry) bgPauseAllJobsLocked(ctx context.Context) (err error) { 246 defer r.Trace(ctx, &err, "bgPauseAllJobsLocked")() 247 err = r.initLocked(ctx) 248 if err != nil { 249 return err 250 } 251 252 for jobID, pause := range r.runningJobs { 253 if pause == nil { 254 continue 255 } 256 pause() 257 job, ok := r.jobHistory.JobHistory[jobID] 258 if !ok { 259 continue 260 } 261 job.Status = chat1.ArchiveChatJobStatus_BACKGROUND_PAUSED 262 r.jobHistory.JobHistory[jobID] = job 263 } 264 r.runningJobs = make(map[chat1.ArchiveJobID]types.PauseArchiveFn) 265 266 r.dirty = true 267 err = r.flushLocked(ctx) 268 return err 269 } 270 271 // Pause running jobs marking as BACKGROUND_PAUSED 272 func (r *ChatArchiveRegistry) Stop(ctx context.Context) chan struct{} { 273 defer r.Trace(ctx, nil, "Stop")() 274 r.Lock() 275 defer r.Unlock() 276 ch := make(chan struct{}) 277 if r.started { 278 err := r.bgPauseAllJobsLocked(ctx) 279 if err != nil { 280 r.Debug(ctx, err.Error()) 281 } 282 r.started = false 283 close(r.stopCh) 284 go func() { 285 r.Debug(context.Background(), "Stop: waiting for shutdown") 286 _ = r.eg.Wait() 287 r.Debug(context.Background(), "Stop: shutdown complete") 288 close(ch) 289 }() 290 } else { 291 close(ch) 292 } 293 return ch 294 295 } 296 297 func (r *ChatArchiveRegistry) OnDbNuke(mctx libkb.MetaContext) (err error) { 298 defer r.Trace(mctx.Ctx(), &err, "ChatArchiveRegistry.OnDbNuke")() 299 r.Lock() 300 defer r.Unlock() 301 if !r.started { 302 return nil 303 } 304 r.inited = false 305 return nil 306 } 307 308 type ByJobStartedAt []chat1.ArchiveChatJob 309 310 func (c ByJobStartedAt) Len() int { return len(c) } 311 func (c ByJobStartedAt) Swap(i, j int) { c[i], c[j] = c[j], c[i] } 312 func (c ByJobStartedAt) Less(i, j int) bool { 313 x := c[i] 314 y := c[j] 315 if x.StartedAt == y.StartedAt { 316 return x.Request.JobID < y.Request.JobID 317 } 318 return c[i].StartedAt.Before(c[j].StartedAt) 319 } 320 321 func (r *ChatArchiveRegistry) List(ctx context.Context) (res chat1.ArchiveChatListRes, err error) { 322 defer r.Trace(ctx, &err, "List")() 323 r.Lock() 324 defer r.Unlock() 325 err = r.initLocked(ctx) 326 if err != nil { 327 return res, err 328 } 329 330 for _, job := range r.jobHistory.JobHistory { 331 res.Jobs = append(res.Jobs, job) 332 } 333 sort.Sort(ByJobStartedAt(res.Jobs)) 334 return res, nil 335 } 336 337 func (r *ChatArchiveRegistry) Get(ctx context.Context, jobID chat1.ArchiveJobID) (res chat1.ArchiveChatJob, err error) { 338 defer r.Trace(ctx, &err, "Get(%v)", jobID)() 339 r.Lock() 340 defer r.Unlock() 341 err = r.initLocked(ctx) 342 if err != nil { 343 return res, err 344 } 345 346 job, ok := r.jobHistory.JobHistory[jobID] 347 if !ok { 348 return res, NewArchiveJobNotFoundError(jobID) 349 } 350 return job, nil 351 } 352 353 func (r *ChatArchiveRegistry) Delete(ctx context.Context, jobID chat1.ArchiveJobID, deleteOutputPath bool) (err error) { 354 defer r.Trace(ctx, &err, "Delete(%v)", jobID)() 355 r.Lock() 356 defer r.Unlock() 357 err = r.initLocked(ctx) 358 if err != nil { 359 return err 360 } 361 362 cancel, ok := r.runningJobs[jobID] 363 if ok { 364 // Ignore the job output since we're deleting it anyway 365 cancel() 366 delete(r.runningJobs, jobID) 367 } 368 job, ok := r.jobHistory.JobHistory[jobID] 369 if !ok { 370 return NewArchiveJobNotFoundError(jobID) 371 } 372 delete(r.jobHistory.JobHistory, jobID) 373 r.dirty = true 374 if deleteOutputPath { 375 go func() { 376 _ = os.RemoveAll(job.Request.OutputPath) 377 }() 378 } 379 return nil 380 } 381 382 func (r *ChatArchiveRegistry) Set(ctx context.Context, cancel types.PauseArchiveFn, job chat1.ArchiveChatJob) (err error) { 383 defer r.Trace(ctx, &err, "Set(%v) -> %v", job.Request.JobID, job.Status)() 384 r.Lock() 385 defer r.Unlock() 386 err = r.initLocked(ctx) 387 if err != nil { 388 return err 389 } 390 391 jobID := job.Request.JobID 392 switch job.Status { 393 case chat1.ArchiveChatJobStatus_COMPLETE, chat1.ArchiveChatJobStatus_ERROR: 394 delete(r.runningJobs, jobID) 395 case chat1.ArchiveChatJobStatus_RUNNING: 396 if cancel != nil { 397 r.runningJobs[jobID] = cancel 398 } 399 } 400 401 r.jobHistory.JobHistory[jobID] = job.DeepCopy() 402 r.dirty = true 403 return nil 404 } 405 406 func (r *ChatArchiveRegistry) Pause(ctx context.Context, jobID chat1.ArchiveJobID) (err error) { 407 defer r.Trace(ctx, &err, "Pause(%v)", jobID)() 408 r.Lock() 409 defer r.Unlock() 410 411 err = r.initLocked(ctx) 412 if err != nil { 413 return err 414 } 415 416 job, ok := r.jobHistory.JobHistory[jobID] 417 if !ok { 418 return NewArchiveJobNotFoundError(jobID) 419 } 420 421 if job.Status != chat1.ArchiveChatJobStatus_RUNNING { 422 return fmt.Errorf("Cannot pause a non-running job. Found status %v", job.Status) 423 } 424 425 pause, ok := r.runningJobs[jobID] 426 if !ok { 427 return NewArchiveJobNotFoundError(jobID) 428 } 429 if pause == nil { 430 return fmt.Errorf("pause unexpectedly nil") 431 } 432 delete(r.runningJobs, jobID) 433 434 pause() 435 job.Status = chat1.ArchiveChatJobStatus_PAUSED 436 r.jobHistory.JobHistory[jobID] = job 437 r.dirty = true 438 return nil 439 } 440 441 func (r *ChatArchiveRegistry) Resume(ctx context.Context, jobID chat1.ArchiveJobID) (err error) { 442 defer r.Trace(ctx, &err, "Resume(%v)", jobID)() 443 r.Lock() 444 defer r.Unlock() 445 446 err = r.initLocked(ctx) 447 if err != nil { 448 return err 449 } 450 451 job, ok := r.jobHistory.JobHistory[jobID] 452 if !ok { 453 return NewArchiveJobNotFoundError(jobID) 454 } 455 456 switch job.Status { 457 case chat1.ArchiveChatJobStatus_ERROR: 458 case chat1.ArchiveChatJobStatus_PAUSED: 459 case chat1.ArchiveChatJobStatus_BACKGROUND_PAUSED: 460 default: 461 return fmt.Errorf("Cannot resume a non-paused job. Found status %v", job.Status) 462 } 463 464 // Resume the job in the background, the job will register itself as running 465 go func() { 466 ctx := globals.ChatCtx(context.Background(), r.G(), keybase1.TLFIdentifyBehavior_CHAT_GUI, nil, NewSimpleIdentifyNotifier(r.G())) 467 _, err := NewChatArchiver(r.G(), r.uid, r.remoteClient).ArchiveChat(ctx, job.Request) 468 if err != nil { 469 r.Debug(ctx, err.Error()) 470 } 471 }() 472 return nil 473 } 474 475 var _ types.ChatArchiveRegistry = (*ChatArchiveRegistry)(nil) 476 477 const defaultPageSizeDesktop = 1000 478 const defaultPageSizeMobile = 300 479 480 // Fullfil an archive query 481 type ChatArchiver struct { 482 globals.Contextified 483 utils.DebugLabeler 484 uid gregor1.UID 485 486 pageSize int 487 488 sync.Mutex 489 remoteClient func() chat1.RemoteInterface 490 } 491 492 func NewChatArchiver(g *globals.Context, uid gregor1.UID, remoteClient func() chat1.RemoteInterface) *ChatArchiver { 493 c := &ChatArchiver{ 494 Contextified: globals.NewContextified(g), 495 DebugLabeler: utils.NewDebugLabeler(g.ExternalG(), "ChatArchiver", false), 496 uid: uid, 497 remoteClient: remoteClient, 498 } 499 switch c.G().GetAppType() { 500 case libkb.MobileAppType: 501 c.pageSize = defaultPageSizeMobile 502 default: 503 c.pageSize = defaultPageSizeDesktop 504 } 505 return c 506 } 507 508 func (c *ChatArchiver) notifyProgress(ctx context.Context, jobID chat1.ArchiveJobID, msgsComplete, msgsTotal int64) { 509 c.Debug(ctx, "notifyProgress(%s) %d/%d", jobID, msgsComplete, msgsTotal) 510 c.G().NotifyRouter.HandleChatArchiveProgress(ctx, jobID, msgsComplete, msgsTotal) 511 } 512 513 func (c *ChatArchiver) archiveName(conv chat1.ConversationLocal) string { 514 return chatrender.ConvName(c.G().GlobalContext, conv, c.G().GlobalContext.Env.GetUsername().String()) 515 } 516 517 func (c *ChatArchiver) attachmentName(msg chat1.MessageUnboxedValid) string { 518 body := msg.MessageBody 519 typ, err := body.MessageType() 520 if err != nil { 521 return "" 522 } 523 if typ == chat1.MessageType_ATTACHMENT { 524 att := body.Attachment() 525 return fmt.Sprintf("%s (%d) - %s", gregor1.FromTime(msg.ServerHeader.Ctime).Format("2006-01-02 15.04.05"), msg.ServerHeader.MessageID, att.Object.Filename) 526 } 527 return "" 528 } 529 530 func (c *ChatArchiver) checkpointConv(ctx context.Context, f *os.File, checkpoint chat1.ArchiveChatConvCheckpoint, convID chat1.ConversationID, job *chat1.ArchiveChatJob) (msgsComplete, msgsTotal int64, err error) { 531 // Flush and update the registry 532 err = f.Sync() 533 if err != nil { 534 return 0, 0, err 535 } 536 stat, err := f.Stat() 537 if err != nil { 538 return 0, 0, err 539 } 540 checkpoint.Offset = stat.Size() 541 c.Debug(ctx, "checkpointConv %+v", checkpoint) 542 543 c.Lock() 544 defer c.Unlock() 545 job.MessagesComplete += int64(checkpoint.Pagination.Num) 546 if job.MessagesComplete > job.MessagesTotal { 547 // total messages is capped to the convs expunge, don't over report. 548 job.MessagesComplete = job.MessagesTotal 549 } 550 // Add this conv's individual progress. 551 job.Checkpoints[convID.DbShortFormString()] = checkpoint 552 553 err = c.G().ArchiveRegistry.Set(ctx, nil, *job) 554 return job.MessagesComplete, job.MessagesTotal, err 555 } 556 557 func (c *ChatArchiver) archiveConv(ctx context.Context, jobReq chat1.ArchiveChatJobRequest, job *chat1.ArchiveChatJob, conv chat1.ConversationLocal) error { 558 c.Lock() 559 checkpoint, ok := job.Checkpoints[conv.Info.Id.DbShortFormString()] 560 c.Unlock() 561 if !ok { 562 checkpoint = chat1.ArchiveChatConvCheckpoint{} 563 } else { 564 c.Debug(ctx, "Resuming from checkpoint %+v", checkpoint) 565 } 566 567 convArchivePath := path.Join(job.Request.OutputPath, c.archiveName(conv), "chat.txt") 568 f, err := os.OpenFile(convArchivePath, os.O_RDWR|os.O_CREATE, libkb.PermFile) 569 if err != nil { 570 return err 571 } 572 err = f.Truncate(checkpoint.Offset) 573 if err != nil { 574 return err 575 } 576 _, err = f.Seek(checkpoint.Offset, 0) 577 if err != nil { 578 return err 579 } 580 defer f.Close() 581 582 firstPage := checkpoint.Offset == 0 583 for !checkpoint.Pagination.Last { 584 // Walk forward through the thread 585 checkpoint.Pagination.Num = c.pageSize 586 checkpoint.Pagination.Previous = nil 587 thread, err := c.G().ConvSource.Pull(ctx, conv.Info.Id, c.uid, 588 chat1.GetThreadReason_ARCHIVE, nil, 589 &chat1.GetThreadQuery{ 590 MarkAsRead: false, 591 }, &checkpoint.Pagination) 592 if err != nil { 593 return err 594 } 595 596 msgs := thread.Messages 597 // reverse the thread in place so we render in descending order in the file. 598 for i, j := 0, len(msgs)-1; i < j; i, j = i+1, j-1 { 599 msgs[i], msgs[j] = msgs[j], msgs[i] 600 } 601 602 if len(msgs) == 0 { 603 continue 604 } 605 606 view := chatrender.ConversationView{ 607 Conversation: conv, 608 Messages: msgs, 609 Opts: chatrender.RenderOptions{ 610 UseDateTime: true, 611 // Only show the headline message once 612 SkipHeadline: !firstPage, 613 }, 614 } 615 616 err = view.RenderToWriter(c.G().GlobalContext, f, 1024, false) 617 if err != nil { 618 return err 619 } 620 621 // Check for any attachment messages and download them alongside the chat. 622 var eg errgroup.Group 623 // Fetch attachments in parallel but limit the number since we 624 // also allow parallel conv fetching. 625 eg.SetLimit(5) 626 for _, m := range msgs { 627 if !m.IsValidFull() { 628 continue 629 } 630 msg := m.Valid() 631 body := msg.MessageBody 632 typ, err := body.MessageType() 633 if err != nil { 634 return err 635 } 636 if typ == chat1.MessageType_ATTACHMENT { 637 eg.Go(func() error { 638 attachmentPath := path.Join(jobReq.OutputPath, c.archiveName(conv), c.attachmentName(msg)) 639 f, err := os.Create(attachmentPath) 640 if err != nil { 641 return err 642 } 643 defer f.Close() 644 645 err = attachments.Download(ctx, c.G(), c.uid, conv.Info.Id, 646 msg.ServerHeader.MessageID, f, false, func(_, _ int64) {}, c.remoteClient) 647 if err != nil { 648 return err 649 } 650 return nil 651 }) 652 } 653 } 654 err = eg.Wait() 655 if err != nil { 656 return err 657 } 658 659 // update our pagination so we can correctly fetch the next page 660 // and marking progress in our checkpoint. 661 firstPage = false 662 checkpoint.Pagination = *thread.Pagination 663 msgsComplete, msgsTotal, err := c.checkpointConv(ctx, f, checkpoint, conv.Info.Id, job) 664 if err != nil { 665 return err 666 } 667 668 // update our progress percentage in the UI 669 c.notifyProgress(ctx, jobReq.JobID, msgsComplete, msgsTotal) 670 } 671 return nil 672 } 673 674 func (c *ChatArchiver) ArchiveChat(ctx context.Context, arg chat1.ArchiveChatJobRequest) (outpath string, err error) { 675 defer c.Trace(ctx, &err, "ArchiveChat")() 676 677 if len(arg.OutputPath) == 0 { 678 switch c.G().GetAppType() { 679 case libkb.MobileAppType: 680 arg.OutputPath = path.Join(c.G().GlobalContext.Env.GetCacheDir(), fmt.Sprintf("kbchat-%s", arg.JobID)) 681 default: 682 arg.OutputPath = path.Join(c.G().GlobalContext.Env.GetDownloadsDir(), fmt.Sprintf("kbchat-%s", arg.JobID)) 683 } 684 } 685 686 jobInfo, err := c.G().ArchiveRegistry.Get(ctx, arg.JobID) 687 if err != nil { 688 if _, ok := err.(ArchiveJobNotFoundError); !ok { 689 return "", err 690 } 691 jobInfo = chat1.ArchiveChatJob{ 692 Request: arg, 693 StartedAt: gregor1.ToTime(time.Now()), 694 Checkpoints: make(map[string]chat1.ArchiveChatConvCheckpoint), 695 } 696 } 697 698 // Setup to run each conv in parallel 699 eg, ctx := errgroup.WithContext(ctx) 700 ctx, cancelCtx := context.WithCancel(ctx) 701 // Make an explicit pause distinct from other ctx cancellation 702 pauseCh := make(chan struct{}) 703 pause := func() { 704 defer c.Trace(ctx, nil, "ArchiveChat.pause")() 705 close(pauseCh) 706 cancelCtx() 707 } 708 // And update our state when we exit 709 defer func() { 710 defer c.Trace(ctx, &err, "ArchiveChat.cleanup")() 711 select { 712 case <-pauseCh: 713 c.Debug(ctx, "canceled by registry, short-circuiting.") 714 // If we were canceled by the registry, abort. 715 err = fmt.Errorf("Archive job paused") 716 default: 717 // Update the registry 718 jobInfo.Status = chat1.ArchiveChatJobStatus_COMPLETE 719 if err != nil { 720 jobInfo.Status = chat1.ArchiveChatJobStatus_ERROR 721 jobInfo.Err = err.Error() 722 } 723 724 // Write even if our context was canceled 725 ierr := c.G().ArchiveRegistry.Set(context.TODO(), nil, jobInfo) 726 if ierr != nil { 727 c.Debug(ctx, "ArchiveChat.cleanup %v", ierr) 728 } 729 } 730 731 // Alert the UI 732 c.G().NotifyRouter.HandleChatArchiveComplete(ctx, arg.JobID) 733 }() 734 735 // Presume to resume 736 jobInfo.Status = chat1.ArchiveChatJobStatus_RUNNING 737 jobInfo.Err = "" 738 739 // Update the store ASAP, we will update it again once we resolve the inbox query but that may take some time. 740 err = c.G().ArchiveRegistry.Set(ctx, pause, jobInfo) 741 if err != nil { 742 return "", err 743 } 744 745 c.notifyProgress(ctx, arg.JobID, jobInfo.MessagesComplete, jobInfo.MessagesTotal) 746 747 // Make sure the root output path exists 748 err = os.MkdirAll(arg.OutputPath, os.ModePerm) 749 if err != nil { 750 return "", err 751 } 752 753 // Resolve query to a set of convIDs. 754 iboxRes, _, err := c.G().InboxSource.Read(ctx, c.uid, types.ConversationLocalizerBlocking, 755 types.InboxSourceDataSourceAll, nil, arg.Query) 756 if err != nil { 757 return "", err 758 } 759 convs := iboxRes.Convs 760 761 // Fetch size of each conv to track progress. 762 var totalMsgs int64 763 for _, conv := range convs { 764 totalMsgs += int64(conv.MaxVisibleMsgID() - conv.GetMaxDeletedUpTo()) 765 766 convArchivePath := path.Join(arg.OutputPath, c.archiveName(conv)) 767 err = os.MkdirAll(convArchivePath, os.ModePerm) 768 if err != nil { 769 return "", err 770 } 771 } 772 773 jobInfo.MessagesTotal = totalMsgs 774 jobInfo.MatchingConvs = utils.PresentConversationLocals(ctx, c.G(), c.uid, convs, utils.PresentParticipantsModeSkip) 775 err = c.G().ArchiveRegistry.Set(ctx, nil, jobInfo) 776 if err != nil { 777 return "", err 778 } 779 c.notifyProgress(ctx, arg.JobID, jobInfo.MessagesComplete, jobInfo.MessagesTotal) 780 781 // For each conv, fetch batches of messages until all are fetched. 782 // - Messages are rendered in a text format and attachments are downloaded to the archive path. 783 eg.SetLimit(10) 784 for _, conv := range convs { 785 conv := conv 786 eg.Go(func() error { 787 return c.archiveConv(ctx, arg, &jobInfo, conv) 788 }) 789 } 790 err = eg.Wait() 791 if err != nil { 792 return "", err 793 } 794 795 outpath = arg.OutputPath 796 if arg.Compress { 797 outpath += ".tar.gzip" 798 err = tarGzip(arg.OutputPath, outpath) 799 if err != nil { 800 return "", err 801 } 802 err = os.RemoveAll(arg.OutputPath) 803 if err != nil { 804 return "", err 805 } 806 } 807 808 return outpath, nil 809 } 810 811 func tarGzip(inPath, outPath string) error { 812 f, err := os.Create(outPath) 813 if err != nil { 814 return err 815 } 816 defer f.Close() 817 818 zr := gzip.NewWriter(f) 819 defer zr.Close() 820 tw := tar.NewWriter(zr) 821 defer tw.Close() 822 823 err = filepath.Walk(inPath, func(fp string, fi os.FileInfo, err error) error { 824 if err != nil { 825 return err 826 } 827 header, err := tar.FileInfoHeader(fi, fp) 828 if err != nil { 829 return err 830 } 831 name, err := filepath.Rel(inPath, filepath.ToSlash(fp)) 832 if err != nil { 833 return err 834 } 835 header.Name = name 836 837 if err := tw.WriteHeader(header); err != nil { 838 return err 839 } 840 if fi.IsDir() { 841 return nil 842 } 843 file, err := os.Open(fp) 844 if err != nil { 845 return err 846 } 847 defer file.Close() 848 if _, err := io.Copy(tw, file); err != nil { 849 return err 850 } 851 return nil 852 }) 853 if err != nil { 854 return err 855 } 856 return nil 857 }