github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/model/move/export.go (about) 1 package move 2 3 import ( 4 "archive/tar" 5 "archive/zip" 6 "compress/gzip" 7 "encoding/json" 8 "errors" 9 "io" 10 "net/url" 11 "path" 12 "time" 13 14 "github.com/cozy/cozy-stack/model/instance" 15 "github.com/cozy/cozy-stack/model/job" 16 "github.com/cozy/cozy-stack/model/note" 17 "github.com/cozy/cozy-stack/model/vfs" 18 "github.com/cozy/cozy-stack/pkg/consts" 19 "github.com/cozy/cozy-stack/pkg/couchdb" 20 "github.com/cozy/cozy-stack/pkg/mail" 21 "github.com/cozy/cozy-stack/pkg/prefixer" 22 "github.com/cozy/cozy-stack/pkg/realtime" 23 ) 24 25 // ExportOptions contains the options for launching the export worker. 26 type ExportOptions struct { 27 PartsSize int64 `json:"parts_size"` 28 MaxAge time.Duration `json:"max_age"` 29 WithDoctypes []string `json:"with_doctypes,omitempty"` 30 ContextualDomain string `json:"contextual_domain,omitempty"` 31 TokenSource string `json:"token_source,omitempty"` 32 IgnoreVault bool `json:"ignore_vault,omitempty"` 33 MoveTo *MoveToOptions `json:"move_to,omitempty"` 34 AdminReq bool `json:"admin_req,omitempty"` 35 } 36 37 // MoveToOptions is used when the export must be sent to another Cozy. 38 type MoveToOptions struct { 39 URL string `json:"url"` 40 Token string `json:"token"` 41 ClientID string `json:"client_id"` 42 ClientSecret string `json:"client_secret"` 43 } 44 45 // ImportsURL returns the URL on the target for sending the download link to 46 // the export tarballs. 47 func (m *MoveToOptions) ImportsURL() string { 48 u, err := url.Parse(m.URL) 49 if err != nil { 50 u, err = url.Parse("https://" + m.URL) 51 } 52 if err != nil { 53 return m.URL 54 } 55 u.Path = "/move/imports" 56 return u.String() 57 } 58 59 // minimalPartsSize is the minimal size of a file bucket, to split the index 60 // into equal-sized parts. 61 const minimalPartsSize = 1024 * 1024 * 1024 // 1 GB 62 63 const ( 64 // ExportDataDir is the directory for storing the documents from CouchDB in 65 // the export archive. 66 ExportDataDir = "My Cozy/Data" 67 // ExportFilesDir is the directory for storing the content of the files in 68 // the export archive. 69 ExportFilesDir = "My Cozy/Files" 70 // ExportVersionsDir is the directory for storing the content of the old 71 // versions of the files in the export archive. 72 ExportVersionsDir = "My Cozy/Versions" 73 ) 74 75 // ExportCopyData does an HTTP copy of a part of the file indexes. 76 func ExportCopyData(w io.Writer, inst *instance.Instance, exportDoc *ExportDoc, archiver Archiver, cursor Cursor) error { 77 zw := zip.NewWriter(w) 78 defer func() { 79 _ = zw.Close() 80 }() 81 82 if cursor.Number == 0 { 83 err := copyJSONData(zw, inst, exportDoc, archiver) 84 if err != nil { 85 return err 86 } 87 } 88 89 if exportDoc.AcceptDoctype(consts.Files) { 90 if err := copyFiles(zw, inst, exportDoc, cursor); err != nil { 91 return err 92 } 93 } 94 95 if exportDoc.AcceptDoctype(consts.FilesVersions) { 96 if err := copyVersions(zw, inst, exportDoc, cursor); err != nil { 97 return err 98 } 99 } 100 101 return nil 102 } 103 104 func copyJSONData(zw *zip.Writer, inst *instance.Instance, exportDoc *ExportDoc, archiver Archiver) error { 105 archive, err := archiver.OpenArchive(inst, exportDoc) 106 if err != nil { 107 return err 108 } 109 defer func() { 110 _ = archive.Close() 111 }() 112 113 gr, err := gzip.NewReader(archive) 114 if err != nil { 115 return err 116 } 117 now := time.Now() 118 tr := tar.NewReader(gr) 119 defer func() { 120 _ = gr.Close() 121 }() 122 123 for { 124 header, err := tr.Next() 125 if errors.Is(err, io.EOF) { 126 break 127 } 128 if err != nil { 129 return err 130 } 131 if header.Typeflag != tar.TypeReg { 132 continue 133 } 134 135 zipHeader := &zip.FileHeader{ 136 Name: path.Join(ExportDataDir, header.Name), 137 Method: zip.Deflate, 138 Modified: now, 139 } 140 zipHeader.SetMode(0640) 141 zipFileWriter, err := zw.CreateHeader(zipHeader) 142 if err != nil { 143 return err 144 } 145 _, err = io.Copy(zipFileWriter, tr) 146 if err != nil { 147 return err 148 } 149 } 150 151 return nil 152 } 153 154 func copyFiles(zw *zip.Writer, inst *instance.Instance, exportDoc *ExportDoc, cursor Cursor) error { 155 files, err := listFilesFromCursor(inst, exportDoc, cursor) 156 if err != nil { 157 return err 158 } 159 160 fs := inst.VFS() 161 filepather := vfs.NewFilePatherWithCache(fs) 162 163 for _, file := range files { 164 metaHeader := &zip.FileHeader{ 165 Name: path.Join(ExportDataDir, consts.Files, file.DocID+".json"), 166 Method: zip.Deflate, 167 Modified: file.UpdatedAt, 168 } 169 metaHeader.SetMode(0640) 170 metaWriter, err := zw.CreateHeader(metaHeader) 171 if err != nil { 172 return err 173 } 174 doc, err := json.Marshal(file) 175 if err != nil { 176 return err 177 } 178 if _, err = metaWriter.Write(doc); err != nil { 179 return err 180 } 181 182 f, err := fs.OpenFile(file) 183 if err != nil { 184 // Ignore missing file, as it may happen that a file is deleted 185 // while an export is running as we are not always locking the 186 // VFS or blocking the instance (or the file system is not clean) 187 continue 188 } 189 defer func() { 190 _ = f.Close() 191 }() 192 fullpath, err := file.Path(filepather) 193 if err != nil { 194 return err 195 } 196 fileHeader := &zip.FileHeader{ 197 Name: path.Join(ExportFilesDir, fullpath), 198 Method: zip.Deflate, 199 Modified: file.UpdatedAt, 200 } 201 if file.Executable { 202 fileHeader.SetMode(0750) 203 } else { 204 fileHeader.SetMode(0640) 205 } 206 zipFileWriter, err := zw.CreateHeader(fileHeader) 207 if err != nil { 208 return err 209 } 210 _, err = io.Copy(zipFileWriter, f) 211 if err != nil { 212 return err 213 } 214 } 215 216 return nil 217 } 218 219 func copyVersions(zw *zip.Writer, inst *instance.Instance, exportDoc *ExportDoc, cursor Cursor) error { 220 versions, err := listVersionsFromCursor(inst, exportDoc, cursor) 221 if err != nil { 222 return err 223 } 224 225 fs := inst.VFS() 226 finder := newFileFinderWithCache(fs) 227 228 for _, version := range versions { 229 metaHeader := &zip.FileHeader{ 230 Name: path.Join(ExportDataDir, consts.FilesVersions, version.DocID+".json"), 231 Method: zip.Deflate, 232 Modified: version.UpdatedAt, 233 } 234 metaHeader.SetMode(0640) 235 metaWriter, err := zw.CreateHeader(metaHeader) 236 if err != nil { 237 return err 238 } 239 doc, err := json.Marshal(version) 240 if err != nil { 241 return err 242 } 243 if _, err = metaWriter.Write(doc); err != nil { 244 return err 245 } 246 247 file, err := finder.Find(version.DocID) 248 if err != nil { 249 // Ignore missing file, as it may happen that a file is deleted 250 // while an export is running as we are not always locking the 251 // VFS or blocking the instance (or the file system is not clean) 252 continue 253 } 254 255 f, err := fs.OpenFileVersion(file, version) 256 if err != nil { 257 // Ignore missing version, as it may happen that a version is 258 // deleted while an export is running as we are not always locking 259 // the VFS or blocking the instance (or the file system is not clean) 260 continue 261 } 262 defer func() { 263 _ = f.Close() 264 }() 265 fileHeader := &zip.FileHeader{ 266 Name: path.Join(ExportFilesDir, version.DocID), 267 Method: zip.Deflate, 268 Modified: version.UpdatedAt, 269 } 270 fileHeader.SetMode(0640) 271 zipFileWriter, err := zw.CreateHeader(fileHeader) 272 if err != nil { 273 return err 274 } 275 _, err = io.Copy(zipFileWriter, f) 276 if err != nil { 277 return err 278 } 279 } 280 281 return nil 282 } 283 284 // CreateExport is used to create a tarball with the data from an instance. 285 // 286 // Note: the tarball is a .tar.gz and not a .zip to allow streaming from Swift 287 // to the stack, and from the stack to the client, as .tar.gz can be read 288 // sequentially and reading a .zip need to seek. 289 func CreateExport(i *instance.Instance, opts ExportOptions, archiver Archiver) (*ExportDoc, error) { 290 exportDoc := prepareExportDoc(i, opts) 291 if err := exportDoc.CleanPreviousExports(archiver); err != nil { 292 return nil, err 293 } 294 295 if err := couchdb.CreateDoc(prefixer.GlobalPrefixer, exportDoc); err != nil { 296 return nil, err 297 } 298 realtime.GetHub().Publish(i, realtime.EventCreate, exportDoc.Clone(), nil) 299 300 size, err := writeArchive(i, exportDoc, archiver) 301 old := exportDoc.Clone() 302 errf := exportDoc.MarksAsFinished(i, size, err) 303 realtime.GetHub().Publish(i, realtime.EventUpdate, exportDoc, old) 304 if err != nil { 305 return nil, err 306 } 307 return exportDoc, errf 308 } 309 310 func writeArchive(i *instance.Instance, exportDoc *ExportDoc, archiver Archiver) (int64, error) { 311 out, err := archiver.CreateArchive(exportDoc) 312 if err != nil { 313 return 0, err 314 } 315 size, err := writeArchiveContent(i, exportDoc, out) 316 if err != nil { 317 return 0, err 318 } 319 return size, out.Close() 320 } 321 322 func writeArchiveContent(i *instance.Instance, exportDoc *ExportDoc, out io.Writer) (int64, error) { 323 gw, err := gzip.NewWriterLevel(out, gzip.BestCompression) 324 if err != nil { 325 return 0, err 326 } 327 tw := tar.NewWriter(gw) 328 size, err := writeDocuments(i, exportDoc, tw) 329 if err != nil { 330 return 0, err 331 } 332 if err := tw.Close(); err != nil { 333 return 0, err 334 } 335 if err := gw.Close(); err != nil { 336 return 0, err 337 } 338 return size, nil 339 } 340 341 func writeDocuments(i *instance.Instance, exportDoc *ExportDoc, tw *tar.Writer) (int64, error) { 342 var size int64 343 createdAt := exportDoc.CreatedAt 344 345 n, err := writeInstanceDoc(i, "instance", createdAt, tw) 346 if err != nil { 347 return 0, err 348 } 349 size += n 350 351 n, err = exportDocuments(i, exportDoc, createdAt, tw) 352 if err != nil { 353 return 0, err 354 } 355 size += n 356 357 if exportDoc.AcceptDoctype(consts.Files) { 358 n, err := exportFiles(i, exportDoc, tw) 359 if err != nil { 360 return 0, err 361 } 362 size += n 363 } 364 365 return size, nil 366 } 367 368 func exportFiles(i *instance.Instance, exportDoc *ExportDoc, tw *tar.Writer) (int64, error) { 369 _ = note.FlushPendings(i) 370 371 var size int64 372 filesizes := make(map[string]int64) 373 err := vfs.Walk(i.VFS(), "/", func(fullpath string, dir *vfs.DirDoc, file *vfs.FileDoc, err error) error { 374 if err != nil { 375 return err 376 } 377 if dir != nil { 378 n, err := writeDoc(consts.Files, dir.DocID, dir, exportDoc.CreatedAt, tw) 379 size += n 380 return err 381 } 382 filesizes[file.DocID] = file.ByteSize 383 return nil 384 }) 385 if err != nil { 386 return 0, err 387 } 388 389 versionsizes := make(map[string]int64) 390 err = couchdb.ForeachDocs(i, consts.FilesVersions, func(id string, raw json.RawMessage) error { 391 var doc vfs.Version 392 if err := json.Unmarshal(raw, &doc); err != nil { 393 return err 394 } 395 versionsizes[id] = doc.ByteSize 396 return nil 397 }) 398 if err != nil { 399 return 0, err 400 } 401 402 remaining := exportDoc.PartsSize 403 var cursors []string 404 cursors, remaining = splitFiles(exportDoc.PartsSize, remaining, filesizes, consts.Files) 405 exportDoc.PartsCursors = cursors 406 cursors, _ = splitFiles(exportDoc.PartsSize, remaining, versionsizes, consts.FilesVersions) 407 if len(cursors) > 0 { 408 exportDoc.PartsCursors = append(exportDoc.PartsCursors, cursors...) 409 } 410 return size, nil 411 } 412 413 func exportDocuments(in *instance.Instance, doc *ExportDoc, now time.Time, tw *tar.Writer) (int64, error) { 414 doctypes, err := couchdb.AllDoctypes(in) 415 if err != nil { 416 return 0, err 417 } 418 419 var size int64 420 for _, doctype := range doctypes { 421 if !doc.AcceptDoctype(doctype) { 422 continue 423 } 424 switch doctype { 425 case consts.Files, consts.FilesVersions: 426 // we have code specific to those doctypes 427 continue 428 } 429 dir := url.PathEscape(doctype) 430 err := couchdb.ForeachDocs(in, doctype, func(id string, doc json.RawMessage) error { 431 n, err := writeMarshaledDoc(dir, id, doc, now, tw) 432 if err == nil { 433 size += n 434 } 435 return err 436 }) 437 if err != nil { 438 return 0, err 439 } 440 } 441 return size, nil 442 } 443 444 func writeInstanceDoc(in *instance.Instance, name string, now time.Time, tw *tar.Writer) (int64, error) { 445 clone := in.Clone().(*instance.Instance) 446 clone.PassphraseHash = nil 447 clone.PassphraseResetToken = nil 448 clone.PassphraseResetTime = nil 449 clone.RegisterToken = nil 450 clone.SessSecret = nil 451 clone.OAuthSecret = nil 452 clone.CLISecret = nil 453 clone.SwiftLayout = 0 454 clone.CouchCluster = 0 455 clone.IndexViewsVersion = 0 456 return writeDoc("", name, clone, now, tw) 457 } 458 459 func writeDoc(dir, name string, data interface{}, now time.Time, tw *tar.Writer) (int64, error) { 460 doc, err := json.Marshal(data) 461 if err != nil { 462 return 0, err 463 } 464 return writeMarshaledDoc(dir, name, doc, now, tw) 465 } 466 467 func writeMarshaledDoc(dir, name string, doc json.RawMessage, now time.Time, tw *tar.Writer) (int64, error) { 468 if tw == nil { // For testing purpose 469 return 1, nil 470 } 471 472 hdr := &tar.Header{ 473 Name: path.Join(dir, name+".json"), 474 Mode: 0640, 475 Size: int64(len(doc)), 476 Typeflag: tar.TypeReg, 477 ModTime: now, 478 } 479 if err := tw.WriteHeader(hdr); err != nil { 480 return 0, err 481 } 482 n, err := tw.Write(doc) 483 return int64(n), err 484 } 485 486 // SendExportFailureMail sends an email to the user when the export has failed. 487 func SendExportFailureMail(inst *instance.Instance) error { 488 email := mail.Options{ 489 Mode: mail.ModeFromStack, 490 TemplateName: "export_error", 491 } 492 msg, err := job.NewMessage(&email) 493 if err != nil { 494 return err 495 } 496 _, err = job.System().PushJob(inst, &job.JobRequest{ 497 WorkerType: "sendmail", 498 Message: msg, 499 }) 500 return err 501 }