github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/chat/archive.go (about) 1 package chat 2 3 import ( 4 "archive/tar" 5 "compress/gzip" 6 "context" 7 "errors" 8 "fmt" 9 "io" 10 "os" 11 "path" 12 "path/filepath" 13 "sort" 14 "sync" 15 "time" 16 17 "github.com/keybase/client/go/chat/attachments" 18 "github.com/keybase/client/go/chat/globals" 19 "github.com/keybase/client/go/chat/storage" 20 "github.com/keybase/client/go/chat/types" 21 "github.com/keybase/client/go/chat/utils" 22 "github.com/keybase/client/go/chatrender" 23 "github.com/keybase/client/go/encrypteddb" 24 "github.com/keybase/client/go/libkb" 25 "github.com/keybase/client/go/protocol/chat1" 26 "github.com/keybase/client/go/protocol/gregor1" 27 "github.com/keybase/client/go/protocol/keybase1" 28 "github.com/keybase/clockwork" 29 "golang.org/x/sync/errgroup" 30 ) 31 32 type ChatArchiveRegistry struct { 33 globals.Contextified 34 utils.DebugLabeler 35 sync.Mutex 36 37 started bool 38 uid gregor1.UID 39 // Have we populated from disk? 40 inited bool 41 // Delay before we restart paused jobs on startup 42 resumeJobsDelay time.Duration 43 flushDelay time.Duration 44 stopCh chan struct{} 45 clock clockwork.Clock 46 eg errgroup.Group 47 // Changes to flush to disk? 48 dirty bool 49 remoteClient func() chat1.RemoteInterface 50 runningJobs map[chat1.ArchiveJobID]types.CancelArchiveFn 51 52 edb *encrypteddb.EncryptedDB 53 jobHistory chat1.ArchiveChatHistory 54 } 55 56 type ArchiveJobNotFoundError struct { 57 jobID chat1.ArchiveJobID 58 } 59 60 func (e ArchiveJobNotFoundError) Error() string { 61 return fmt.Sprintf("job not found: %s", e.jobID) 62 } 63 64 func NewArchiveJobNotFoundError(jobID chat1.ArchiveJobID) ArchiveJobNotFoundError { 65 return ArchiveJobNotFoundError{jobID: jobID} 66 } 67 68 var _ error = ArchiveJobNotFoundError{} 69 70 func NewChatArchiveRegistry(g *globals.Context, remoteClient func() chat1.RemoteInterface) *ChatArchiveRegistry { 71 keyFn := func(ctx context.Context) ([32]byte, error) { 72 return storage.GetSecretBoxKey(ctx, g.ExternalG()) 73 } 74 dbFn := func(g *libkb.GlobalContext) *libkb.JSONLocalDb { 75 return g.LocalChatDb 76 } 77 r := &ChatArchiveRegistry{ 78 Contextified: globals.NewContextified(g), 79 DebugLabeler: utils.NewDebugLabeler(g.ExternalG(), "ChatArchiveRegistry", false), 80 remoteClient: remoteClient, 81 clock: clockwork.NewRealClock(), 82 flushDelay: 15 * time.Second, 83 runningJobs: make(map[chat1.ArchiveJobID]types.CancelArchiveFn), 84 jobHistory: chat1.ArchiveChatHistory{JobHistory: make(map[chat1.ArchiveJobID]chat1.ArchiveChatJob)}, 85 edb: encrypteddb.New(g.ExternalG(), dbFn, keyFn), 86 } 87 switch r.G().GetAppType() { 88 case libkb.MobileAppType: 89 r.resumeJobsDelay = 30 * time.Second 90 default: 91 r.resumeJobsDelay = 30 * time.Second 92 } 93 return r 94 } 95 96 func (r *ChatArchiveRegistry) dbKey() libkb.DbKey { 97 version := 0 98 key := fmt.Sprintf("ar:%d:%s", version, r.uid) 99 return libkb.DbKey{ 100 Typ: libkb.DBChatArchiveRegistry, 101 Key: key, 102 } 103 } 104 105 func (r *ChatArchiveRegistry) initLocked(ctx context.Context) error { 106 if !r.started { 107 return errors.New("not started") 108 } 109 if r.inited { 110 return nil 111 } 112 found, err := r.edb.Get(ctx, r.dbKey(), &r.jobHistory) 113 if err != nil { 114 return err 115 } 116 if !found { 117 r.jobHistory = chat1.ArchiveChatHistory{JobHistory: make(map[chat1.ArchiveJobID]chat1.ArchiveChatJob)} 118 } 119 r.inited = true 120 return nil 121 } 122 123 func (r *ChatArchiveRegistry) flushLocked(ctx context.Context) error { 124 if r.dirty { 125 err := r.edb.Put(ctx, r.dbKey(), r.jobHistory) 126 if err != nil { 127 return err 128 } 129 r.dirty = false 130 } 131 return nil 132 } 133 134 func (r *ChatArchiveRegistry) flushLoop(stopCh chan struct{}) error { 135 ctx := context.Background() 136 r.Debug(ctx, "flushLoop: starting") 137 for { 138 select { 139 case <-stopCh: 140 r.Debug(ctx, "flushLoop: shutting down") 141 return nil 142 case <-r.clock.After(r.flushDelay): 143 func() { 144 r.Lock() 145 defer r.Unlock() 146 err := r.flushLocked(ctx) 147 if err != nil { 148 r.Debug(ctx, "flushLoop: failed to flush: %s", err) 149 } 150 }() 151 } 152 } 153 } 154 155 func (r *ChatArchiveRegistry) resumeAllBgJobs(ctx context.Context) error { 156 select { 157 case <-r.stopCh: 158 return nil 159 case <-ctx.Done(): 160 return ctx.Err() 161 case <-time.After(r.resumeJobsDelay): 162 } 163 r.Lock() 164 defer r.Unlock() 165 err := r.initLocked(ctx) 166 if err != nil { 167 return err 168 } 169 for _, job := range r.jobHistory.JobHistory { 170 if job.Status == chat1.ArchiveChatJobStatus_BACKGROUND_PAUSED { 171 go func(job chat1.ArchiveChatJob) { 172 ctx := globals.ChatCtx(context.Background(), r.G(), keybase1.TLFIdentifyBehavior_CHAT_GUI, nil, NewSimpleIdentifyNotifier(r.G())) 173 _, err := NewChatArchiver(r.G(), r.uid, r.remoteClient).ArchiveChat(ctx, job.Request) 174 if err != nil { 175 r.Debug(ctx, err.Error()) 176 } 177 }(job) 178 } 179 } 180 return nil 181 } 182 183 func (r *ChatArchiveRegistry) monitorAppState() error { 184 appState := keybase1.MobileAppState_FOREGROUND 185 ctx, cancel := context.WithCancel(context.Background()) 186 for { 187 select { 188 case <-r.stopCh: 189 cancel() 190 return nil 191 case appState = <-r.G().MobileAppState.NextUpdate(&appState): 192 switch appState { 193 case keybase1.MobileAppState_FOREGROUND: 194 go func() { 195 ierr := r.resumeAllBgJobs(ctx) 196 if ierr != nil { 197 r.Debug(ctx, ierr.Error()) 198 } 199 }() 200 default: 201 cancel() 202 ctx, cancel = context.WithCancel(context.Background()) 203 r.bgPauseAllJobsLocked(ctx) 204 } 205 } 206 } 207 } 208 209 // Resumes previously BACKGROUND_PAUSED jobs, after a delay. 210 func (r *ChatArchiveRegistry) Start(ctx context.Context, uid gregor1.UID) { 211 defer r.Trace(ctx, nil, "Start")() 212 r.Lock() 213 defer r.Unlock() 214 if r.started { 215 return 216 } 217 r.uid = uid 218 r.started = true 219 r.stopCh = make(chan struct{}) 220 r.eg.Go(func() error { 221 return r.flushLoop(r.stopCh) 222 }) 223 r.eg.Go(func() error { 224 return r.resumeAllBgJobs(context.Background()) 225 }) 226 r.eg.Go(r.monitorAppState) 227 } 228 229 func (r *ChatArchiveRegistry) bgPauseAllJobsLocked(ctx context.Context) { 230 for jobID, cancel := range r.runningJobs { 231 job := cancel() 232 job.Status = chat1.ArchiveChatJobStatus_BACKGROUND_PAUSED 233 r.jobHistory.JobHistory[jobID] = job 234 } 235 r.runningJobs = make(map[chat1.ArchiveJobID]func() chat1.ArchiveChatJob) 236 237 r.dirty = true 238 _ = r.flushLocked(ctx) 239 } 240 241 // Pause running jobs marking as BACKGROUND_PAUSED 242 func (r *ChatArchiveRegistry) Stop(ctx context.Context) chan struct{} { 243 defer r.Trace(ctx, nil, "Stop")() 244 r.Lock() 245 defer r.Unlock() 246 ch := make(chan struct{}) 247 if r.started { 248 r.started = false 249 r.bgPauseAllJobsLocked(ctx) 250 close(r.stopCh) 251 go func() { 252 r.Debug(context.Background(), "Stop: waiting for shutdown") 253 _ = r.eg.Wait() 254 r.Debug(context.Background(), "Stop: shutdown complete") 255 close(ch) 256 }() 257 } else { 258 close(ch) 259 } 260 return ch 261 262 } 263 264 func (r *ChatArchiveRegistry) OnDbNuke(mctx libkb.MetaContext) (err error) { 265 defer r.Trace(mctx.Ctx(), &err, "ChatArchiveRegistry.OnDbNuke")() 266 r.Lock() 267 defer r.Unlock() 268 if !r.started { 269 return nil 270 } 271 r.inited = false 272 return nil 273 } 274 275 type ByJobStartedAt []chat1.ArchiveChatJob 276 277 func (c ByJobStartedAt) Len() int { return len(c) } 278 func (c ByJobStartedAt) Swap(i, j int) { c[i], c[j] = c[j], c[i] } 279 func (c ByJobStartedAt) Less(i, j int) bool { 280 x := c[i] 281 y := c[j] 282 if x.StartedAt == y.StartedAt { 283 return x.Request.JobID < y.Request.JobID 284 } 285 return c[i].StartedAt.Before(c[j].StartedAt) 286 } 287 288 func (r *ChatArchiveRegistry) List(ctx context.Context) (res chat1.ArchiveChatListRes, err error) { 289 defer r.Trace(ctx, &err, "List")() 290 r.Lock() 291 defer r.Unlock() 292 err = r.initLocked(ctx) 293 if err != nil { 294 return res, err 295 } 296 297 for _, job := range r.jobHistory.JobHistory { 298 res.Jobs = append(res.Jobs, job) 299 } 300 sort.Sort(ByJobStartedAt(res.Jobs)) 301 return res, nil 302 } 303 304 func (r *ChatArchiveRegistry) Get(ctx context.Context, jobID chat1.ArchiveJobID) (res chat1.ArchiveChatJob, err error) { 305 defer r.Trace(ctx, &err, "Get(%s)", jobID)() 306 r.Lock() 307 defer r.Unlock() 308 err = r.initLocked(ctx) 309 if err != nil { 310 return res, err 311 } 312 313 job, ok := r.jobHistory.JobHistory[jobID] 314 if !ok { 315 return res, NewArchiveJobNotFoundError(jobID) 316 } 317 return job, nil 318 } 319 320 func (r *ChatArchiveRegistry) Delete(ctx context.Context, jobID chat1.ArchiveJobID, deleteOutputPath bool) (err error) { 321 defer r.Trace(ctx, &err, "Delete(%s)", jobID)() 322 r.Lock() 323 defer r.Unlock() 324 err = r.initLocked(ctx) 325 if err != nil { 326 return err 327 } 328 329 cancel, ok := r.runningJobs[jobID] 330 if ok { 331 // Ignore the job output since we're deleting it anyway 332 cancel() 333 delete(r.runningJobs, jobID) 334 } 335 job, ok := r.jobHistory.JobHistory[jobID] 336 if !ok { 337 return NewArchiveJobNotFoundError(jobID) 338 } 339 delete(r.jobHistory.JobHistory, jobID) 340 r.dirty = true 341 if deleteOutputPath { 342 err = os.RemoveAll(job.Request.OutputPath) 343 if err != nil { 344 return err 345 } 346 } 347 return nil 348 } 349 350 func (r *ChatArchiveRegistry) Set(ctx context.Context, cancel types.CancelArchiveFn, job chat1.ArchiveChatJob) (err error) { 351 defer r.Trace(ctx, &err, "Set(%+v)", job)() 352 r.Lock() 353 defer r.Unlock() 354 err = r.initLocked(ctx) 355 if err != nil { 356 return err 357 } 358 359 jobID := job.Request.JobID 360 switch job.Status { 361 case chat1.ArchiveChatJobStatus_COMPLETE: 362 fallthrough 363 case chat1.ArchiveChatJobStatus_ERROR: 364 delete(r.runningJobs, jobID) 365 case chat1.ArchiveChatJobStatus_RUNNING: 366 if cancel != nil { 367 r.runningJobs[jobID] = cancel 368 } 369 } 370 371 r.jobHistory.JobHistory[jobID] = job 372 r.dirty = true 373 return nil 374 } 375 376 func (r *ChatArchiveRegistry) Pause(ctx context.Context, jobID chat1.ArchiveJobID) (err error) { 377 defer r.Trace(ctx, &err, "Pause(%v)", jobID)() 378 r.Lock() 379 defer r.Unlock() 380 381 err = r.initLocked(ctx) 382 if err != nil { 383 return err 384 } 385 386 job, ok := r.jobHistory.JobHistory[jobID] 387 if !ok { 388 return NewArchiveJobNotFoundError(jobID) 389 } 390 391 if job.Status != chat1.ArchiveChatJobStatus_RUNNING { 392 return fmt.Errorf("Cannot pause a non-running job. Found status %v", job.Status) 393 } 394 395 cancel, ok := r.runningJobs[jobID] 396 if !ok { 397 return NewArchiveJobNotFoundError(jobID) 398 } 399 if cancel == nil { 400 return fmt.Errorf("cancel unexpectedly nil") 401 } 402 delete(r.runningJobs, jobID) 403 404 job = cancel() 405 job.Status = chat1.ArchiveChatJobStatus_PAUSED 406 r.jobHistory.JobHistory[jobID] = job 407 r.dirty = true 408 return nil 409 } 410 411 func (r *ChatArchiveRegistry) Resume(ctx context.Context, jobID chat1.ArchiveJobID) (err error) { 412 defer r.Trace(ctx, &err, "Resume(%v)", jobID)() 413 r.Lock() 414 defer r.Unlock() 415 416 err = r.initLocked(ctx) 417 if err != nil { 418 return err 419 } 420 421 job, ok := r.jobHistory.JobHistory[jobID] 422 if !ok { 423 return NewArchiveJobNotFoundError(jobID) 424 } 425 426 switch job.Status { 427 case chat1.ArchiveChatJobStatus_ERROR: 428 case chat1.ArchiveChatJobStatus_PAUSED: 429 case chat1.ArchiveChatJobStatus_BACKGROUND_PAUSED: 430 default: 431 return fmt.Errorf("Cannot resume a non-paused job. Found status %v", job.Status) 432 } 433 434 // Resume the job in the background, the job will register itself as running 435 go func() { 436 _, err := NewChatArchiver(r.G(), r.uid, r.remoteClient).ArchiveChat(context.Background(), job.Request) 437 if err != nil { 438 r.Debug(ctx, err.Error()) 439 } 440 }() 441 return nil 442 } 443 444 var _ types.ChatArchiveRegistry = (*ChatArchiveRegistry)(nil) 445 446 const defaultPageSizeDesktop = 999 447 const defaultPageSizeMobile = 300 448 449 // Fullfil an archive query 450 type ChatArchiver struct { 451 globals.Contextified 452 utils.DebugLabeler 453 uid gregor1.UID 454 455 pageSize int 456 457 sync.Mutex 458 messagesComplete int64 459 messagesTotal int64 460 remoteClient func() chat1.RemoteInterface 461 } 462 463 func NewChatArchiver(g *globals.Context, uid gregor1.UID, remoteClient func() chat1.RemoteInterface) *ChatArchiver { 464 c := &ChatArchiver{ 465 Contextified: globals.NewContextified(g), 466 DebugLabeler: utils.NewDebugLabeler(g.ExternalG(), "ChatArchiver", false), 467 uid: uid, 468 remoteClient: remoteClient, 469 } 470 switch c.G().GetAppType() { 471 case libkb.MobileAppType: 472 c.pageSize = defaultPageSizeMobile 473 default: 474 c.pageSize = defaultPageSizeDesktop 475 } 476 return c 477 } 478 479 func (c *ChatArchiver) notifyProgress(ctx context.Context, jobID chat1.ArchiveJobID, pagination chat1.Pagination) { 480 c.Lock() 481 defer c.Unlock() 482 c.messagesComplete += int64(pagination.Num) 483 if c.messagesComplete > c.messagesTotal || pagination.Last { 484 // total messages is capped to the convs expunge, don't over report. 485 c.messagesComplete = c.messagesTotal 486 } 487 c.G().NotifyRouter.HandleChatArchiveProgress(ctx, jobID, c.messagesComplete, c.messagesTotal) 488 } 489 490 func (c *ChatArchiver) archiveName(conv chat1.ConversationLocal) string { 491 return chatrender.ConvName(c.G().GlobalContext, conv, c.G().GlobalContext.Env.GetUsername().String()) 492 } 493 494 func (c *ChatArchiver) attachmentName(msg chat1.MessageUnboxedValid) string { 495 body := msg.MessageBody 496 typ, err := body.MessageType() 497 if err != nil { 498 return "" 499 } 500 if typ == chat1.MessageType_ATTACHMENT { 501 att := body.Attachment() 502 return fmt.Sprintf("%s (%d) - %s", gregor1.FromTime(msg.ServerHeader.Ctime).Format("2006-01-02 15.04.05"), msg.ServerHeader.MessageID, att.Object.Filename) 503 } 504 return "" 505 } 506 507 func (c *ChatArchiver) checkpointConv(ctx context.Context, f *os.File, cp chat1.ArchiveChatConvCheckpoint, convID chat1.ConversationID, job *chat1.ArchiveChatJob) (err error) { 508 // Flush and update the registry 509 err = f.Sync() 510 if err != nil { 511 return err 512 } 513 stat, err := f.Stat() 514 if err != nil { 515 return err 516 } 517 cp.Offset = stat.Size() 518 519 c.Lock() 520 // Mark our overall progress. 521 job.MessagesTotal = c.messagesTotal 522 job.MessagesComplete = c.messagesComplete 523 // And this conv's individual progress. 524 job.Checkpoints[convID.DbShortFormString()] = cp 525 c.Unlock() 526 527 return c.G().ArchiveRegistry.Set(ctx, nil, *job) 528 } 529 530 func (c *ChatArchiver) archiveConv(ctx context.Context, job *chat1.ArchiveChatJob, conv chat1.ConversationLocal) error { 531 c.Lock() 532 cp, ok := job.Checkpoints[conv.Info.Id.DbShortFormString()] 533 c.Unlock() 534 if !ok { 535 cp = chat1.ArchiveChatConvCheckpoint{ 536 Pagination: chat1.Pagination{Num: c.pageSize}, 537 Offset: 0, 538 } 539 } 540 541 convArchivePath := path.Join(job.Request.OutputPath, c.archiveName(conv), "chat.txt") 542 f, err := os.OpenFile(convArchivePath, os.O_RDWR|os.O_CREATE, libkb.PermFile) 543 if err != nil { 544 return err 545 } 546 err = f.Truncate(cp.Offset) 547 if err != nil { 548 return err 549 } 550 _, err = f.Seek(cp.Offset, 0) 551 if err != nil { 552 return err 553 } 554 defer f.Close() 555 556 firstPage := cp.Offset == 0 557 for !cp.Pagination.Last { 558 thread, err := c.G().ConvSource.Pull(ctx, conv.Info.Id, c.uid, 559 chat1.GetThreadReason_ARCHIVE, nil, 560 &chat1.GetThreadQuery{ 561 MarkAsRead: false, 562 }, &cp.Pagination) 563 if err != nil { 564 return err 565 } 566 567 msgs := thread.Messages 568 569 // reverse the thread in place so we render in descending order in the file. 570 for i, j := 0, len(msgs)-1; i < j; i, j = i+1, j-1 { 571 msgs[i], msgs[j] = msgs[j], msgs[i] 572 } 573 574 view := chatrender.ConversationView{ 575 Conversation: conv, 576 Messages: msgs, 577 Opts: chatrender.RenderOptions{ 578 UseDateTime: true, 579 // Only show the headline message once 580 SkipHeadline: !firstPage, 581 }, 582 } 583 584 err = view.RenderToWriter(c.G().GlobalContext, f, 1024, false) 585 if err != nil { 586 return err 587 } 588 589 // Check for any attachment messages and download them alongside the chat. 590 var eg errgroup.Group 591 // Fetch attachments in parallel but limit the number since we 592 // also allow parallel conv fetching. 593 eg.SetLimit(5) 594 for _, m := range msgs { 595 if !m.IsValidFull() { 596 continue 597 } 598 msg := m.Valid() 599 body := msg.MessageBody 600 typ, err := body.MessageType() 601 if err != nil { 602 return err 603 } 604 if typ == chat1.MessageType_ATTACHMENT { 605 eg.Go(func() error { 606 attachmentPath := path.Join(job.Request.OutputPath, c.archiveName(conv), c.attachmentName(msg)) 607 f, err := os.Create(attachmentPath) 608 if err != nil { 609 return err 610 } 611 defer f.Close() 612 613 err = attachments.Download(ctx, c.G(), c.uid, conv.Info.Id, 614 msg.ServerHeader.MessageID, f, false, func(_, _ int64) {}, c.remoteClient) 615 if err != nil { 616 return err 617 } 618 return nil 619 }) 620 } 621 } 622 err = eg.Wait() 623 if err != nil { 624 return err 625 } 626 627 // update our progress percentage in the UI 628 c.notifyProgress(ctx, job.Request.JobID, *thread.Pagination) 629 630 // update our pagination so we can correctly fetch the next page and marking progress in our checkpoint. 631 firstPage = false 632 cp.Pagination = *thread.Pagination 633 cp.Pagination.Num = c.pageSize 634 cp.Pagination.Previous = nil 635 ierr := c.checkpointConv(ctx, f, cp, conv.Info.Id, job) 636 if ierr != nil { 637 c.Debug(ctx, ierr.Error()) 638 } 639 } 640 return nil 641 } 642 643 func (c *ChatArchiver) ArchiveChat(ctx context.Context, arg chat1.ArchiveChatJobRequest) (outpath string, err error) { 644 defer c.Trace(ctx, &err, "ArchiveChat")() 645 646 if len(arg.OutputPath) == 0 { 647 arg.OutputPath = path.Join(c.G().GlobalContext.Env.GetDownloadsDir(), fmt.Sprintf("kbchat-%s", arg.JobID)) 648 } 649 650 // Make sure the root output path exists 651 err = os.MkdirAll(arg.OutputPath, os.ModePerm) 652 if err != nil { 653 return "", err 654 } 655 656 // Resolve query to a set of convIDs. 657 iboxRes, _, err := c.G().InboxSource.Read(ctx, c.uid, types.ConversationLocalizerBlocking, 658 types.InboxSourceDataSourceAll, nil, arg.Query) 659 if err != nil { 660 return "", err 661 } 662 convs := iboxRes.Convs 663 664 // Fetch size of each conv to track progress. 665 for _, conv := range convs { 666 c.messagesTotal += int64(conv.MaxVisibleMsgID() - conv.GetMaxDeletedUpTo()) 667 668 convArchivePath := path.Join(arg.OutputPath, c.archiveName(conv)) 669 err = os.MkdirAll(convArchivePath, os.ModePerm) 670 if err != nil { 671 return "", err 672 } 673 } 674 675 jobInfo, err := c.G().ArchiveRegistry.Get(ctx, arg.JobID) 676 if err != nil { 677 if _, ok := err.(ArchiveJobNotFoundError); !ok { 678 return "", err 679 } 680 jobInfo = chat1.ArchiveChatJob{ 681 Request: arg, 682 StartedAt: gregor1.ToTime(time.Now()), 683 Checkpoints: make(map[string]chat1.ArchiveChatConvCheckpoint), 684 } 685 } 686 // Presume to resume 687 jobInfo.Status = chat1.ArchiveChatJobStatus_RUNNING 688 jobInfo.Err = "" 689 690 // Setup to run each conv in parallel 691 eg, ctx := errgroup.WithContext(ctx) 692 ctx, cancel := context.WithCancel(ctx) 693 // Closed if we are canceled 694 cancelCh := make(chan struct{}) 695 doneCh := make(chan struct{}) 696 pause := func() chat1.ArchiveChatJob { 697 cancel() 698 close(cancelCh) 699 // Block until we cleanup 700 <-doneCh 701 c.Lock() 702 defer c.Unlock() 703 return jobInfo 704 } 705 706 // Mark ourselves as running 707 err = c.G().ArchiveRegistry.Set(ctx, pause, jobInfo) 708 if err != nil { 709 return "", err 710 } 711 // And update our state when we exit 712 defer func() { 713 defer func() { close(doneCh) }() 714 select { 715 case <-cancelCh: 716 c.Debug(ctx, "canceled by registry, short-circuiting.") 717 // If we were canceled by the registry, abort. 718 return 719 default: 720 } 721 722 // Update the registry 723 jobInfo.Status = chat1.ArchiveChatJobStatus_COMPLETE 724 if err != nil { 725 jobInfo.Status = chat1.ArchiveChatJobStatus_ERROR 726 jobInfo.Err = err.Error() 727 } 728 ierr := c.G().ArchiveRegistry.Set(ctx, nil, jobInfo) 729 if ierr != nil { 730 c.Debug(ctx, ierr.Error()) 731 } 732 733 // Alert the UI 734 c.G().NotifyRouter.HandleChatArchiveComplete(ctx, arg.JobID) 735 }() 736 737 // For each conv, fetch batches of messages until all are fetched. 738 // - Messages are rendered in a text format and attachments are downloaded to the archive path. 739 eg.SetLimit(10) 740 for _, conv := range convs { 741 conv := conv 742 eg.Go(func() error { 743 return c.archiveConv(ctx, &jobInfo, conv) 744 }) 745 } 746 err = eg.Wait() 747 if err != nil { 748 return "", err 749 } 750 751 outpath = arg.OutputPath 752 if arg.Compress { 753 outpath += ".tar.gzip" 754 err = tarGzip(arg.OutputPath, outpath) 755 if err != nil { 756 return "", err 757 } 758 err = os.RemoveAll(arg.OutputPath) 759 if err != nil { 760 return "", err 761 } 762 } 763 764 return outpath, nil 765 } 766 767 func tarGzip(inPath, outPath string) error { 768 f, err := os.Create(outPath) 769 if err != nil { 770 return err 771 } 772 defer f.Close() 773 774 zr := gzip.NewWriter(f) 775 defer zr.Close() 776 tw := tar.NewWriter(zr) 777 defer tw.Close() 778 779 err = filepath.Walk(inPath, func(fp string, fi os.FileInfo, err error) error { 780 if err != nil { 781 return err 782 } 783 header, err := tar.FileInfoHeader(fi, fp) 784 if err != nil { 785 return err 786 } 787 name, err := filepath.Rel(inPath, filepath.ToSlash(fp)) 788 if err != nil { 789 return err 790 } 791 header.Name = name 792 793 if err := tw.WriteHeader(header); err != nil { 794 return err 795 } 796 if fi.IsDir() { 797 return nil 798 } 799 file, err := os.Open(fp) 800 if err != nil { 801 return err 802 } 803 defer file.Close() 804 if _, err := io.Copy(tw, file); err != nil { 805 return err 806 } 807 return nil 808 }) 809 if err != nil { 810 return err 811 } 812 return nil 813 }