github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/state/backups/archive.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package backups 5 6 import ( 7 "bytes" 8 "compress/gzip" 9 "io" 10 "os" 11 "path" 12 "path/filepath" 13 14 "github.com/juju/errors" 15 "github.com/juju/utils/v3/tar" 16 "github.com/juju/version/v2" 17 ) 18 19 const ( 20 contentDir = "juju-backup" 21 filesBundle = "root.tar" 22 dbDumpDir = "dump" 23 metadataFile = "metadata.json" 24 ) 25 26 var legacyVersion = version.Number{Major: 1, Minor: 20} 27 28 // ArchivePaths holds the paths to the files and directories in a 29 // backup archive. 30 type ArchivePaths struct { 31 // ContentDir is the path to the directory within the archive 32 // containing all the contents. It is the only file or directory at 33 // the top-level of the archive and everything else in the archive 34 // is contained in the content directory. 35 ContentDir string 36 37 // FilesBundle is the path to the tar file inside the archive 38 // containing all the state-related files (with the exception of the 39 // DB dump files) gathered in by the backup machinery. 40 FilesBundle string 41 42 // DBDumpDir is the path to the directory within the archive 43 // contents that contains all the files dumped from the juju state 44 // database. 45 DBDumpDir string 46 47 // MetadataFile is the path to the metadata file. 48 MetadataFile string 49 } 50 51 // NewCanonicalArchivePaths composes a new ArchivePaths with default 52 // values set. These values are relative (un-rooted) and the canonical 53 // slash ("/") is the path separator. Thus the paths are suitable for 54 // resolving the paths in a backup archive file (which is a tar file). 55 func NewCanonicalArchivePaths() ArchivePaths { 56 return ArchivePaths{ 57 ContentDir: contentDir, 58 FilesBundle: path.Join(contentDir, filesBundle), 59 DBDumpDir: path.Join(contentDir, dbDumpDir), 60 MetadataFile: path.Join(contentDir, metadataFile), 61 } 62 } 63 64 // NewNonCanonicalArchivePaths builds a new ArchivePaths using default 65 // values, rooted at the provided rootDir. The path separator used is 66 // platform-dependent. The resulting paths are suitable for locating 67 // backup archive contents in a directory into which an archive has 68 // been unpacked. 69 func NewNonCanonicalArchivePaths(rootDir string) ArchivePaths { 70 return ArchivePaths{ 71 ContentDir: filepath.Join(rootDir, contentDir), 72 FilesBundle: filepath.Join(rootDir, contentDir, filesBundle), 73 DBDumpDir: filepath.Join(rootDir, contentDir, dbDumpDir), 74 MetadataFile: filepath.Join(rootDir, contentDir, metadataFile), 75 } 76 } 77 78 // ArchiveWorkspace is a wrapper around backup archive info that has a 79 // concrete root directory and an archive unpacked in it. 80 type ArchiveWorkspace struct { 81 ArchivePaths 82 RootDir string 83 } 84 85 func newArchiveWorkspace() (*ArchiveWorkspace, error) { 86 rootdir, err := os.MkdirTemp("", "juju-backups-") 87 if err != nil { 88 return nil, errors.Annotate(err, "while creating workspace dir") 89 } 90 91 ws := ArchiveWorkspace{ 92 ArchivePaths: NewNonCanonicalArchivePaths(rootdir), 93 RootDir: rootdir, 94 } 95 return &ws, nil 96 } 97 98 // NewArchiveWorkspaceReader returns a new archive workspace with a new 99 // workspace dir populated from the archive. Note that this involves 100 // unpacking the entire archive into a directory under the host's 101 // "temporary" directory. For relatively large archives this could have 102 // adverse effects on hosts with little disk space. 103 func NewArchiveWorkspaceReader(archive io.Reader) (*ArchiveWorkspace, error) { 104 ws, err := newArchiveWorkspace() 105 if err != nil { 106 return nil, errors.Trace(err) 107 } 108 err = unpackCompressedReader(ws.RootDir, archive) 109 return ws, errors.Trace(err) 110 } 111 112 func unpackCompressedReader(targetDir string, tarFile io.Reader) error { 113 tarFile, err := gzip.NewReader(tarFile) 114 if err != nil { 115 return errors.Annotate(err, "while uncompressing archive file") 116 } 117 err = tar.UntarFiles(tarFile, targetDir) 118 return errors.Trace(err) 119 } 120 121 // Close cleans up the workspace dir. 122 func (ws *ArchiveWorkspace) Close() error { 123 err := os.RemoveAll(ws.RootDir) 124 return errors.Trace(err) 125 } 126 127 // UnpackFilesBundle unpacks the archived files bundle into the targeted dir. 128 func (ws *ArchiveWorkspace) UnpackFilesBundle(targetRoot string) error { 129 tarFile, err := os.Open(ws.FilesBundle) 130 if err != nil { 131 return errors.Trace(err) 132 } 133 defer func() { _ = tarFile.Close() }() 134 135 err = tar.UntarFiles(tarFile, targetRoot) 136 return errors.Trace(err) 137 } 138 139 // OpenBundledFile returns an open ReadCloser for the corresponding file in 140 // the archived files bundle. 141 func (ws *ArchiveWorkspace) OpenBundledFile(filename string) (io.Reader, error) { 142 if filepath.IsAbs(filename) { 143 return nil, errors.Errorf("filename must be relative, got %q", filename) 144 } 145 146 tarFile, err := os.Open(ws.FilesBundle) 147 if err != nil { 148 return nil, errors.Trace(err) 149 } 150 151 _, file, err := tar.FindFile(tarFile, filename) 152 if err != nil { 153 _ = tarFile.Close() 154 return nil, errors.Trace(err) 155 } 156 return file, nil 157 } 158 159 // Metadata returns the metadata derived from the JSON file in the archive. 160 func (ws *ArchiveWorkspace) Metadata() (*Metadata, error) { 161 metaFile, err := os.Open(ws.MetadataFile) 162 if err != nil { 163 return nil, errors.Trace(err) 164 } 165 defer func() { _ = metaFile.Close() }() 166 167 meta, err := NewMetadataJSONReader(metaFile) 168 return meta, errors.Trace(err) 169 } 170 171 // ArchiveData is a wrapper around a the uncompressed data in a backup 172 // archive file. It provides access to the content of the archive. While 173 // ArchiveData provides useful functionality, it may not be appropriate 174 // for large archives. The contents of the archive are kept in-memory, 175 // so large archives could be too taxing on the host. In that case 176 // consider using ArchiveWorkspace instead. 177 type ArchiveData struct { 178 ArchivePaths 179 data []byte 180 } 181 182 // NewArchiveData builds a new archive data wrapper for the given 183 // uncompressed data. 184 func NewArchiveData(data []byte) *ArchiveData { 185 return &ArchiveData{ 186 ArchivePaths: NewCanonicalArchivePaths(), 187 data: data, 188 } 189 } 190 191 // NewArchiveDataReader returns a new archive data wrapper for the data in 192 // the provided reader. Note that the entire archive will be read into 193 // memory and kept there. So for relatively large archives it will often 194 // be more appropriate to use ArchiveWorkspace instead. 195 func NewArchiveDataReader(r io.Reader) (*ArchiveData, error) { 196 gzr, err := gzip.NewReader(r) 197 if err != nil { 198 return nil, errors.Trace(err) 199 } 200 defer func() { _ = gzr.Close() }() 201 202 data, err := io.ReadAll(gzr) 203 if err != nil { 204 return nil, errors.Trace(err) 205 } 206 207 return NewArchiveData(data), nil 208 } 209 210 // NewBuffer wraps the archive data in a Buffer. 211 func (ad *ArchiveData) NewBuffer() *bytes.Buffer { 212 return bytes.NewBuffer(ad.data) 213 } 214 215 // Metadata returns the metadata stored in the backup archive. If no 216 // metadata is there, errors.NotFound is returned. 217 func (ad *ArchiveData) Metadata() (*Metadata, error) { 218 buf := ad.NewBuffer() 219 _, metaFile, err := tar.FindFile(buf, ad.MetadataFile) 220 if err != nil { 221 return nil, errors.Trace(err) 222 } 223 224 meta, err := NewMetadataJSONReader(metaFile) 225 return meta, errors.Trace(err) 226 } 227 228 // Version returns the juju version under which the backup archive 229 // was created. If no version is found in the archive, it must come 230 // from before backup archives included the version. In that case we 231 // return version 1.20. 232 func (ad *ArchiveData) Version() (*version.Number, error) { 233 meta, err := ad.Metadata() 234 if errors.IsNotFound(err) { 235 return &legacyVersion, nil 236 } 237 if err != nil { 238 return nil, errors.Trace(err) 239 } 240 241 return &meta.Origin.Version, nil 242 }