github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/model/move/export.go (about)

     1  package move
     2  
     3  import (
     4  	"archive/tar"
     5  	"archive/zip"
     6  	"compress/gzip"
     7  	"encoding/json"
     8  	"errors"
     9  	"io"
    10  	"net/url"
    11  	"path"
    12  	"time"
    13  
    14  	"github.com/cozy/cozy-stack/model/instance"
    15  	"github.com/cozy/cozy-stack/model/job"
    16  	"github.com/cozy/cozy-stack/model/note"
    17  	"github.com/cozy/cozy-stack/model/vfs"
    18  	"github.com/cozy/cozy-stack/pkg/consts"
    19  	"github.com/cozy/cozy-stack/pkg/couchdb"
    20  	"github.com/cozy/cozy-stack/pkg/mail"
    21  	"github.com/cozy/cozy-stack/pkg/prefixer"
    22  	"github.com/cozy/cozy-stack/pkg/realtime"
    23  )
    24  
    25  // ExportOptions contains the options for launching the export worker.
    26  type ExportOptions struct {
    27  	PartsSize        int64          `json:"parts_size"`
    28  	MaxAge           time.Duration  `json:"max_age"`
    29  	WithDoctypes     []string       `json:"with_doctypes,omitempty"`
    30  	ContextualDomain string         `json:"contextual_domain,omitempty"`
    31  	TokenSource      string         `json:"token_source,omitempty"`
    32  	IgnoreVault      bool           `json:"ignore_vault,omitempty"`
    33  	MoveTo           *MoveToOptions `json:"move_to,omitempty"`
    34  	AdminReq         bool           `json:"admin_req,omitempty"`
    35  }
    36  
    37  // MoveToOptions is used when the export must be sent to another Cozy.
    38  type MoveToOptions struct {
    39  	URL          string `json:"url"`
    40  	Token        string `json:"token"`
    41  	ClientID     string `json:"client_id"`
    42  	ClientSecret string `json:"client_secret"`
    43  }
    44  
    45  // ImportsURL returns the URL on the target for sending the download link to
    46  // the export tarballs.
    47  func (m *MoveToOptions) ImportsURL() string {
    48  	u, err := url.Parse(m.URL)
    49  	if err != nil {
    50  		u, err = url.Parse("https://" + m.URL)
    51  	}
    52  	if err != nil {
    53  		return m.URL
    54  	}
    55  	u.Path = "/move/imports"
    56  	return u.String()
    57  }
    58  
    59  // minimalPartsSize is the minimal size of a file bucket, to split the index
    60  // into equal-sized parts.
    61  const minimalPartsSize = 1024 * 1024 * 1024 // 1 GB
    62  
    63  const (
    64  	// ExportDataDir is the directory for storing the documents from CouchDB in
    65  	// the export archive.
    66  	ExportDataDir = "My Cozy/Data"
    67  	// ExportFilesDir is the directory for storing the content of the files in
    68  	// the export archive.
    69  	ExportFilesDir = "My Cozy/Files"
    70  	// ExportVersionsDir is the directory for storing the content of the old
    71  	// versions of the files in the export archive.
    72  	ExportVersionsDir = "My Cozy/Versions"
    73  )
    74  
    75  // ExportCopyData does an HTTP copy of a part of the file indexes.
    76  func ExportCopyData(w io.Writer, inst *instance.Instance, exportDoc *ExportDoc, archiver Archiver, cursor Cursor) error {
    77  	zw := zip.NewWriter(w)
    78  	defer func() {
    79  		_ = zw.Close()
    80  	}()
    81  
    82  	if cursor.Number == 0 {
    83  		err := copyJSONData(zw, inst, exportDoc, archiver)
    84  		if err != nil {
    85  			return err
    86  		}
    87  	}
    88  
    89  	if exportDoc.AcceptDoctype(consts.Files) {
    90  		if err := copyFiles(zw, inst, exportDoc, cursor); err != nil {
    91  			return err
    92  		}
    93  	}
    94  
    95  	if exportDoc.AcceptDoctype(consts.FilesVersions) {
    96  		if err := copyVersions(zw, inst, exportDoc, cursor); err != nil {
    97  			return err
    98  		}
    99  	}
   100  
   101  	return nil
   102  }
   103  
   104  func copyJSONData(zw *zip.Writer, inst *instance.Instance, exportDoc *ExportDoc, archiver Archiver) error {
   105  	archive, err := archiver.OpenArchive(inst, exportDoc)
   106  	if err != nil {
   107  		return err
   108  	}
   109  	defer func() {
   110  		_ = archive.Close()
   111  	}()
   112  
   113  	gr, err := gzip.NewReader(archive)
   114  	if err != nil {
   115  		return err
   116  	}
   117  	now := time.Now()
   118  	tr := tar.NewReader(gr)
   119  	defer func() {
   120  		_ = gr.Close()
   121  	}()
   122  
   123  	for {
   124  		header, err := tr.Next()
   125  		if errors.Is(err, io.EOF) {
   126  			break
   127  		}
   128  		if err != nil {
   129  			return err
   130  		}
   131  		if header.Typeflag != tar.TypeReg {
   132  			continue
   133  		}
   134  
   135  		zipHeader := &zip.FileHeader{
   136  			Name:     path.Join(ExportDataDir, header.Name),
   137  			Method:   zip.Deflate,
   138  			Modified: now,
   139  		}
   140  		zipHeader.SetMode(0640)
   141  		zipFileWriter, err := zw.CreateHeader(zipHeader)
   142  		if err != nil {
   143  			return err
   144  		}
   145  		_, err = io.Copy(zipFileWriter, tr)
   146  		if err != nil {
   147  			return err
   148  		}
   149  	}
   150  
   151  	return nil
   152  }
   153  
   154  func copyFiles(zw *zip.Writer, inst *instance.Instance, exportDoc *ExportDoc, cursor Cursor) error {
   155  	files, err := listFilesFromCursor(inst, exportDoc, cursor)
   156  	if err != nil {
   157  		return err
   158  	}
   159  
   160  	fs := inst.VFS()
   161  	filepather := vfs.NewFilePatherWithCache(fs)
   162  
   163  	for _, file := range files {
   164  		metaHeader := &zip.FileHeader{
   165  			Name:     path.Join(ExportDataDir, consts.Files, file.DocID+".json"),
   166  			Method:   zip.Deflate,
   167  			Modified: file.UpdatedAt,
   168  		}
   169  		metaHeader.SetMode(0640)
   170  		metaWriter, err := zw.CreateHeader(metaHeader)
   171  		if err != nil {
   172  			return err
   173  		}
   174  		doc, err := json.Marshal(file)
   175  		if err != nil {
   176  			return err
   177  		}
   178  		if _, err = metaWriter.Write(doc); err != nil {
   179  			return err
   180  		}
   181  
   182  		f, err := fs.OpenFile(file)
   183  		if err != nil {
   184  			// Ignore missing file, as it may happen that a file is deleted
   185  			// while an export is running as we are not always locking the
   186  			// VFS or blocking the instance (or the file system is not clean)
   187  			continue
   188  		}
   189  		defer func() {
   190  			_ = f.Close()
   191  		}()
   192  		fullpath, err := file.Path(filepather)
   193  		if err != nil {
   194  			return err
   195  		}
   196  		fileHeader := &zip.FileHeader{
   197  			Name:     path.Join(ExportFilesDir, fullpath),
   198  			Method:   zip.Deflate,
   199  			Modified: file.UpdatedAt,
   200  		}
   201  		if file.Executable {
   202  			fileHeader.SetMode(0750)
   203  		} else {
   204  			fileHeader.SetMode(0640)
   205  		}
   206  		zipFileWriter, err := zw.CreateHeader(fileHeader)
   207  		if err != nil {
   208  			return err
   209  		}
   210  		_, err = io.Copy(zipFileWriter, f)
   211  		if err != nil {
   212  			return err
   213  		}
   214  	}
   215  
   216  	return nil
   217  }
   218  
   219  func copyVersions(zw *zip.Writer, inst *instance.Instance, exportDoc *ExportDoc, cursor Cursor) error {
   220  	versions, err := listVersionsFromCursor(inst, exportDoc, cursor)
   221  	if err != nil {
   222  		return err
   223  	}
   224  
   225  	fs := inst.VFS()
   226  	finder := newFileFinderWithCache(fs)
   227  
   228  	for _, version := range versions {
   229  		metaHeader := &zip.FileHeader{
   230  			Name:     path.Join(ExportDataDir, consts.FilesVersions, version.DocID+".json"),
   231  			Method:   zip.Deflate,
   232  			Modified: version.UpdatedAt,
   233  		}
   234  		metaHeader.SetMode(0640)
   235  		metaWriter, err := zw.CreateHeader(metaHeader)
   236  		if err != nil {
   237  			return err
   238  		}
   239  		doc, err := json.Marshal(version)
   240  		if err != nil {
   241  			return err
   242  		}
   243  		if _, err = metaWriter.Write(doc); err != nil {
   244  			return err
   245  		}
   246  
   247  		file, err := finder.Find(version.DocID)
   248  		if err != nil {
   249  			// Ignore missing file, as it may happen that a file is deleted
   250  			// while an export is running as we are not always locking the
   251  			// VFS or blocking the instance (or the file system is not clean)
   252  			continue
   253  		}
   254  
   255  		f, err := fs.OpenFileVersion(file, version)
   256  		if err != nil {
   257  			// Ignore missing version, as it may happen that a version is
   258  			// deleted while an export is running as we are not always locking
   259  			// the VFS or blocking the instance (or the file system is not clean)
   260  			continue
   261  		}
   262  		defer func() {
   263  			_ = f.Close()
   264  		}()
   265  		fileHeader := &zip.FileHeader{
   266  			Name:     path.Join(ExportFilesDir, version.DocID),
   267  			Method:   zip.Deflate,
   268  			Modified: version.UpdatedAt,
   269  		}
   270  		fileHeader.SetMode(0640)
   271  		zipFileWriter, err := zw.CreateHeader(fileHeader)
   272  		if err != nil {
   273  			return err
   274  		}
   275  		_, err = io.Copy(zipFileWriter, f)
   276  		if err != nil {
   277  			return err
   278  		}
   279  	}
   280  
   281  	return nil
   282  }
   283  
   284  // CreateExport is used to create a tarball with the data from an instance.
   285  //
   286  // Note: the tarball is a .tar.gz and not a .zip to allow streaming from Swift
   287  // to the stack, and from the stack to the client, as .tar.gz can be read
   288  // sequentially and reading a .zip need to seek.
   289  func CreateExport(i *instance.Instance, opts ExportOptions, archiver Archiver) (*ExportDoc, error) {
   290  	exportDoc := prepareExportDoc(i, opts)
   291  	if err := exportDoc.CleanPreviousExports(archiver); err != nil {
   292  		return nil, err
   293  	}
   294  
   295  	if err := couchdb.CreateDoc(prefixer.GlobalPrefixer, exportDoc); err != nil {
   296  		return nil, err
   297  	}
   298  	realtime.GetHub().Publish(i, realtime.EventCreate, exportDoc.Clone(), nil)
   299  
   300  	size, err := writeArchive(i, exportDoc, archiver)
   301  	old := exportDoc.Clone()
   302  	errf := exportDoc.MarksAsFinished(i, size, err)
   303  	realtime.GetHub().Publish(i, realtime.EventUpdate, exportDoc, old)
   304  	if err != nil {
   305  		return nil, err
   306  	}
   307  	return exportDoc, errf
   308  }
   309  
   310  func writeArchive(i *instance.Instance, exportDoc *ExportDoc, archiver Archiver) (int64, error) {
   311  	out, err := archiver.CreateArchive(exportDoc)
   312  	if err != nil {
   313  		return 0, err
   314  	}
   315  	size, err := writeArchiveContent(i, exportDoc, out)
   316  	if err != nil {
   317  		return 0, err
   318  	}
   319  	return size, out.Close()
   320  }
   321  
   322  func writeArchiveContent(i *instance.Instance, exportDoc *ExportDoc, out io.Writer) (int64, error) {
   323  	gw, err := gzip.NewWriterLevel(out, gzip.BestCompression)
   324  	if err != nil {
   325  		return 0, err
   326  	}
   327  	tw := tar.NewWriter(gw)
   328  	size, err := writeDocuments(i, exportDoc, tw)
   329  	if err != nil {
   330  		return 0, err
   331  	}
   332  	if err := tw.Close(); err != nil {
   333  		return 0, err
   334  	}
   335  	if err := gw.Close(); err != nil {
   336  		return 0, err
   337  	}
   338  	return size, nil
   339  }
   340  
   341  func writeDocuments(i *instance.Instance, exportDoc *ExportDoc, tw *tar.Writer) (int64, error) {
   342  	var size int64
   343  	createdAt := exportDoc.CreatedAt
   344  
   345  	n, err := writeInstanceDoc(i, "instance", createdAt, tw)
   346  	if err != nil {
   347  		return 0, err
   348  	}
   349  	size += n
   350  
   351  	n, err = exportDocuments(i, exportDoc, createdAt, tw)
   352  	if err != nil {
   353  		return 0, err
   354  	}
   355  	size += n
   356  
   357  	if exportDoc.AcceptDoctype(consts.Files) {
   358  		n, err := exportFiles(i, exportDoc, tw)
   359  		if err != nil {
   360  			return 0, err
   361  		}
   362  		size += n
   363  	}
   364  
   365  	return size, nil
   366  }
   367  
   368  func exportFiles(i *instance.Instance, exportDoc *ExportDoc, tw *tar.Writer) (int64, error) {
   369  	_ = note.FlushPendings(i)
   370  
   371  	var size int64
   372  	filesizes := make(map[string]int64)
   373  	err := vfs.Walk(i.VFS(), "/", func(fullpath string, dir *vfs.DirDoc, file *vfs.FileDoc, err error) error {
   374  		if err != nil {
   375  			return err
   376  		}
   377  		if dir != nil {
   378  			n, err := writeDoc(consts.Files, dir.DocID, dir, exportDoc.CreatedAt, tw)
   379  			size += n
   380  			return err
   381  		}
   382  		filesizes[file.DocID] = file.ByteSize
   383  		return nil
   384  	})
   385  	if err != nil {
   386  		return 0, err
   387  	}
   388  
   389  	versionsizes := make(map[string]int64)
   390  	err = couchdb.ForeachDocs(i, consts.FilesVersions, func(id string, raw json.RawMessage) error {
   391  		var doc vfs.Version
   392  		if err := json.Unmarshal(raw, &doc); err != nil {
   393  			return err
   394  		}
   395  		versionsizes[id] = doc.ByteSize
   396  		return nil
   397  	})
   398  	if err != nil {
   399  		return 0, err
   400  	}
   401  
   402  	remaining := exportDoc.PartsSize
   403  	var cursors []string
   404  	cursors, remaining = splitFiles(exportDoc.PartsSize, remaining, filesizes, consts.Files)
   405  	exportDoc.PartsCursors = cursors
   406  	cursors, _ = splitFiles(exportDoc.PartsSize, remaining, versionsizes, consts.FilesVersions)
   407  	if len(cursors) > 0 {
   408  		exportDoc.PartsCursors = append(exportDoc.PartsCursors, cursors...)
   409  	}
   410  	return size, nil
   411  }
   412  
   413  func exportDocuments(in *instance.Instance, doc *ExportDoc, now time.Time, tw *tar.Writer) (int64, error) {
   414  	doctypes, err := couchdb.AllDoctypes(in)
   415  	if err != nil {
   416  		return 0, err
   417  	}
   418  
   419  	var size int64
   420  	for _, doctype := range doctypes {
   421  		if !doc.AcceptDoctype(doctype) {
   422  			continue
   423  		}
   424  		switch doctype {
   425  		case consts.Files, consts.FilesVersions:
   426  			// we have code specific to those doctypes
   427  			continue
   428  		}
   429  		dir := url.PathEscape(doctype)
   430  		err := couchdb.ForeachDocs(in, doctype, func(id string, doc json.RawMessage) error {
   431  			n, err := writeMarshaledDoc(dir, id, doc, now, tw)
   432  			if err == nil {
   433  				size += n
   434  			}
   435  			return err
   436  		})
   437  		if err != nil {
   438  			return 0, err
   439  		}
   440  	}
   441  	return size, nil
   442  }
   443  
   444  func writeInstanceDoc(in *instance.Instance, name string, now time.Time, tw *tar.Writer) (int64, error) {
   445  	clone := in.Clone().(*instance.Instance)
   446  	clone.PassphraseHash = nil
   447  	clone.PassphraseResetToken = nil
   448  	clone.PassphraseResetTime = nil
   449  	clone.RegisterToken = nil
   450  	clone.SessSecret = nil
   451  	clone.OAuthSecret = nil
   452  	clone.CLISecret = nil
   453  	clone.SwiftLayout = 0
   454  	clone.CouchCluster = 0
   455  	clone.IndexViewsVersion = 0
   456  	return writeDoc("", name, clone, now, tw)
   457  }
   458  
   459  func writeDoc(dir, name string, data interface{}, now time.Time, tw *tar.Writer) (int64, error) {
   460  	doc, err := json.Marshal(data)
   461  	if err != nil {
   462  		return 0, err
   463  	}
   464  	return writeMarshaledDoc(dir, name, doc, now, tw)
   465  }
   466  
   467  func writeMarshaledDoc(dir, name string, doc json.RawMessage, now time.Time, tw *tar.Writer) (int64, error) {
   468  	if tw == nil { // For testing purpose
   469  		return 1, nil
   470  	}
   471  
   472  	hdr := &tar.Header{
   473  		Name:     path.Join(dir, name+".json"),
   474  		Mode:     0640,
   475  		Size:     int64(len(doc)),
   476  		Typeflag: tar.TypeReg,
   477  		ModTime:  now,
   478  	}
   479  	if err := tw.WriteHeader(hdr); err != nil {
   480  		return 0, err
   481  	}
   482  	n, err := tw.Write(doc)
   483  	return int64(n), err
   484  }
   485  
   486  // SendExportFailureMail sends an email to the user when the export has failed.
   487  func SendExportFailureMail(inst *instance.Instance) error {
   488  	email := mail.Options{
   489  		Mode:         mail.ModeFromStack,
   490  		TemplateName: "export_error",
   491  	}
   492  	msg, err := job.NewMessage(&email)
   493  	if err != nil {
   494  		return err
   495  	}
   496  	_, err = job.System().PushJob(inst, &job.JobRequest{
   497  		WorkerType: "sendmail",
   498  		Message:    msg,
   499  	})
   500  	return err
   501  }