github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/state/backups/archive.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package backups
     5  
     6  import (
     7  	"bytes"
     8  	"compress/gzip"
     9  	"io"
    10  	"os"
    11  	"path"
    12  	"path/filepath"
    13  
    14  	"github.com/juju/errors"
    15  	"github.com/juju/utils/v3/tar"
    16  	"github.com/juju/version/v2"
    17  )
    18  
    19  const (
    20  	contentDir   = "juju-backup"
    21  	filesBundle  = "root.tar"
    22  	dbDumpDir    = "dump"
    23  	metadataFile = "metadata.json"
    24  )
    25  
    26  var legacyVersion = version.Number{Major: 1, Minor: 20}
    27  
    28  // ArchivePaths holds the paths to the files and directories in a
    29  // backup archive.
    30  type ArchivePaths struct {
    31  	// ContentDir is the path to the directory within the archive
    32  	// containing all the contents. It is the only file or directory at
    33  	// the top-level of the archive and everything else in the archive
    34  	// is contained in the content directory.
    35  	ContentDir string
    36  
    37  	// FilesBundle is the path to the tar file inside the archive
    38  	// containing all the state-related files (with the exception of the
    39  	// DB dump files) gathered in by the backup machinery.
    40  	FilesBundle string
    41  
    42  	// DBDumpDir is the path to the directory within the archive
    43  	// contents that contains all the files dumped from the juju state
    44  	// database.
    45  	DBDumpDir string
    46  
    47  	// MetadataFile is the path to the metadata file.
    48  	MetadataFile string
    49  }
    50  
    51  // NewCanonicalArchivePaths composes a new ArchivePaths with default
    52  // values set. These values are relative (un-rooted) and the canonical
    53  // slash ("/") is the path separator. Thus the paths are suitable for
    54  // resolving the paths in a backup archive file (which is a tar file).
    55  func NewCanonicalArchivePaths() ArchivePaths {
    56  	return ArchivePaths{
    57  		ContentDir:   contentDir,
    58  		FilesBundle:  path.Join(contentDir, filesBundle),
    59  		DBDumpDir:    path.Join(contentDir, dbDumpDir),
    60  		MetadataFile: path.Join(contentDir, metadataFile),
    61  	}
    62  }
    63  
    64  // NewNonCanonicalArchivePaths builds a new ArchivePaths using default
    65  // values, rooted at the provided rootDir. The path separator used is
    66  // platform-dependent. The resulting paths are suitable for locating
    67  // backup archive contents in a directory into which an archive has
    68  // been unpacked.
    69  func NewNonCanonicalArchivePaths(rootDir string) ArchivePaths {
    70  	return ArchivePaths{
    71  		ContentDir:   filepath.Join(rootDir, contentDir),
    72  		FilesBundle:  filepath.Join(rootDir, contentDir, filesBundle),
    73  		DBDumpDir:    filepath.Join(rootDir, contentDir, dbDumpDir),
    74  		MetadataFile: filepath.Join(rootDir, contentDir, metadataFile),
    75  	}
    76  }
    77  
    78  // ArchiveWorkspace is a wrapper around backup archive info that has a
    79  // concrete root directory and an archive unpacked in it.
    80  type ArchiveWorkspace struct {
    81  	ArchivePaths
    82  	RootDir string
    83  }
    84  
    85  func newArchiveWorkspace() (*ArchiveWorkspace, error) {
    86  	rootdir, err := os.MkdirTemp("", "juju-backups-")
    87  	if err != nil {
    88  		return nil, errors.Annotate(err, "while creating workspace dir")
    89  	}
    90  
    91  	ws := ArchiveWorkspace{
    92  		ArchivePaths: NewNonCanonicalArchivePaths(rootdir),
    93  		RootDir:      rootdir,
    94  	}
    95  	return &ws, nil
    96  }
    97  
    98  // NewArchiveWorkspaceReader returns a new archive workspace with a new
    99  // workspace dir populated from the archive. Note that this involves
   100  // unpacking the entire archive into a directory under the host's
   101  // "temporary" directory. For relatively large archives this could have
   102  // adverse effects on hosts with little disk space.
   103  func NewArchiveWorkspaceReader(archive io.Reader) (*ArchiveWorkspace, error) {
   104  	ws, err := newArchiveWorkspace()
   105  	if err != nil {
   106  		return nil, errors.Trace(err)
   107  	}
   108  	err = unpackCompressedReader(ws.RootDir, archive)
   109  	return ws, errors.Trace(err)
   110  }
   111  
   112  func unpackCompressedReader(targetDir string, tarFile io.Reader) error {
   113  	tarFile, err := gzip.NewReader(tarFile)
   114  	if err != nil {
   115  		return errors.Annotate(err, "while uncompressing archive file")
   116  	}
   117  	err = tar.UntarFiles(tarFile, targetDir)
   118  	return errors.Trace(err)
   119  }
   120  
   121  // Close cleans up the workspace dir.
   122  func (ws *ArchiveWorkspace) Close() error {
   123  	err := os.RemoveAll(ws.RootDir)
   124  	return errors.Trace(err)
   125  }
   126  
   127  // UnpackFilesBundle unpacks the archived files bundle into the targeted dir.
   128  func (ws *ArchiveWorkspace) UnpackFilesBundle(targetRoot string) error {
   129  	tarFile, err := os.Open(ws.FilesBundle)
   130  	if err != nil {
   131  		return errors.Trace(err)
   132  	}
   133  	defer func() { _ = tarFile.Close() }()
   134  
   135  	err = tar.UntarFiles(tarFile, targetRoot)
   136  	return errors.Trace(err)
   137  }
   138  
   139  // OpenBundledFile returns an open ReadCloser for the corresponding file in
   140  // the archived files bundle.
   141  func (ws *ArchiveWorkspace) OpenBundledFile(filename string) (io.Reader, error) {
   142  	if filepath.IsAbs(filename) {
   143  		return nil, errors.Errorf("filename must be relative, got %q", filename)
   144  	}
   145  
   146  	tarFile, err := os.Open(ws.FilesBundle)
   147  	if err != nil {
   148  		return nil, errors.Trace(err)
   149  	}
   150  
   151  	_, file, err := tar.FindFile(tarFile, filename)
   152  	if err != nil {
   153  		_ = tarFile.Close()
   154  		return nil, errors.Trace(err)
   155  	}
   156  	return file, nil
   157  }
   158  
   159  // Metadata returns the metadata derived from the JSON file in the archive.
   160  func (ws *ArchiveWorkspace) Metadata() (*Metadata, error) {
   161  	metaFile, err := os.Open(ws.MetadataFile)
   162  	if err != nil {
   163  		return nil, errors.Trace(err)
   164  	}
   165  	defer func() { _ = metaFile.Close() }()
   166  
   167  	meta, err := NewMetadataJSONReader(metaFile)
   168  	return meta, errors.Trace(err)
   169  }
   170  
   171  // ArchiveData is a wrapper around a the uncompressed data in a backup
   172  // archive file. It provides access to the content of the archive. While
   173  // ArchiveData provides useful functionality, it may not be appropriate
   174  // for large archives. The contents of the archive are kept in-memory,
   175  // so large archives could be too taxing on the host. In that case
   176  // consider using ArchiveWorkspace instead.
   177  type ArchiveData struct {
   178  	ArchivePaths
   179  	data []byte
   180  }
   181  
   182  // NewArchiveData builds a new archive data wrapper for the given
   183  // uncompressed data.
   184  func NewArchiveData(data []byte) *ArchiveData {
   185  	return &ArchiveData{
   186  		ArchivePaths: NewCanonicalArchivePaths(),
   187  		data:         data,
   188  	}
   189  }
   190  
   191  // NewArchiveDataReader returns a new archive data wrapper for the data in
   192  // the provided reader. Note that the entire archive will be read into
   193  // memory and kept there. So for relatively large archives it will often
   194  // be more appropriate to use ArchiveWorkspace instead.
   195  func NewArchiveDataReader(r io.Reader) (*ArchiveData, error) {
   196  	gzr, err := gzip.NewReader(r)
   197  	if err != nil {
   198  		return nil, errors.Trace(err)
   199  	}
   200  	defer func() { _ = gzr.Close() }()
   201  
   202  	data, err := io.ReadAll(gzr)
   203  	if err != nil {
   204  		return nil, errors.Trace(err)
   205  	}
   206  
   207  	return NewArchiveData(data), nil
   208  }
   209  
   210  // NewBuffer wraps the archive data in a Buffer.
   211  func (ad *ArchiveData) NewBuffer() *bytes.Buffer {
   212  	return bytes.NewBuffer(ad.data)
   213  }
   214  
   215  // Metadata returns the metadata stored in the backup archive.  If no
   216  // metadata is there, errors.NotFound is returned.
   217  func (ad *ArchiveData) Metadata() (*Metadata, error) {
   218  	buf := ad.NewBuffer()
   219  	_, metaFile, err := tar.FindFile(buf, ad.MetadataFile)
   220  	if err != nil {
   221  		return nil, errors.Trace(err)
   222  	}
   223  
   224  	meta, err := NewMetadataJSONReader(metaFile)
   225  	return meta, errors.Trace(err)
   226  }
   227  
   228  // Version returns the juju version under which the backup archive
   229  // was created.  If no version is found in the archive, it must come
   230  // from before backup archives included the version.  In that case we
   231  // return version 1.20.
   232  func (ad *ArchiveData) Version() (*version.Number, error) {
   233  	meta, err := ad.Metadata()
   234  	if errors.IsNotFound(err) {
   235  		return &legacyVersion, nil
   236  	}
   237  	if err != nil {
   238  		return nil, errors.Trace(err)
   239  	}
   240  
   241  	return &meta.Origin.Version, nil
   242  }