github.com/opencontainers/umoci@v0.4.8-0.20240508124516-656e4836fb0d/oci/layer/tar_generate.go (about) 1 /* 2 * umoci: Umoci Modifies Open Containers' Images 3 * Copyright (C) 2016-2020 SUSE LLC 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package layer 19 20 import ( 21 "archive/tar" 22 "io" 23 "os" 24 "path/filepath" 25 "strings" 26 27 "github.com/apex/log" 28 "github.com/opencontainers/umoci/pkg/fseval" 29 "github.com/opencontainers/umoci/pkg/system" 30 "github.com/opencontainers/umoci/pkg/testutils" 31 "github.com/pkg/errors" 32 "golang.org/x/sys/unix" 33 ) 34 35 // ignoreXattrs is a list of xattr names that should be ignored when 36 // creating a new image layer, because they are host-specific and/or would be a 37 // bad idea to unpack. They are also excluded from Lclearxattr when extracting 38 // an archive. 39 // XXX: Maybe we should make this configurable so users can manually blacklist 40 // 41 // (or even whitelist) xattrs that they actually want included? Like how 42 // GNU tar's xattr setup works. 43 var ignoreXattrs = map[string]struct{}{ 44 // SELinux doesn't allow you to set SELinux policies generically. They're 45 // also host-specific. So just ignore them during extraction. 46 "security.selinux": {}, 47 48 // NFSv4 ACLs are very system-specific and shouldn't be touched by us, nor 49 // should they be included in images. 50 "system.nfs4_acl": {}, 51 52 // In order to support overlayfs whiteout mode, we shouldn't un-set 53 // this after we've set it when writing out the whiteouts. 54 "trusted.overlay.opaque": {}, 55 56 // We don't want to these xattrs into the image, because they're only 57 // relevant based on how the build overlay is constructed and will not 58 // be true on the target system once the image is unpacked (e.g. inodes 59 // might be different, impure status won't be true, etc.). 60 "trusted.overlay.redirect": {}, 61 "trusted.overlay.origin": {}, 62 "trusted.overlay.impure": {}, 63 "trusted.overlay.nlink": {}, 64 "trusted.overlay.upper": {}, 65 "trusted.overlay.metacopy": {}, 66 } 67 68 func init() { 69 // For test purposes we add a fake forbidden attribute that an unprivileged 70 // user can easily write to (and thus we can test it). 71 if testutils.IsTestBinary() { 72 ignoreXattrs["user.UMOCI:forbidden_xattr"] = struct{}{} 73 } 74 } 75 76 // tarGenerator is a helper for generating layer diff tars. It should be noted 77 // that when using tarGenerator.Add{Path,Whiteout} it is recommended to do it 78 // in lexicographic order. 79 type tarGenerator struct { 80 tw *tar.Writer 81 82 // mapOptions is the set of mapping options for modifying entries before 83 // they're added to the layer. 84 mapOptions MapOptions 85 86 // Hardlink mapping. 87 inodes map[uint64]string 88 89 // fsEval is an fseval.FsEval used for extraction. 90 fsEval fseval.FsEval 91 92 // XXX: Should we add a safety check to make sure we don't generate two of 93 // the same path in a tar archive? This is not permitted by the spec. 94 } 95 96 // newTarGenerator creates a new tarGenerator using the provided writer as the 97 // output writer. 98 func newTarGenerator(w io.Writer, opt MapOptions) *tarGenerator { 99 fsEval := fseval.Default 100 if opt.Rootless { 101 fsEval = fseval.Rootless 102 } 103 104 return &tarGenerator{ 105 tw: tar.NewWriter(w), 106 mapOptions: opt, 107 inodes: map[uint64]string{}, 108 fsEval: fsEval, 109 } 110 } 111 112 // normalise converts the provided pathname to a POSIX-compliant pathname. It also will provide an error if a path looks unsafe. 113 func normalise(rawPath string, isDir bool) (string, error) { 114 // Clean up the path. 115 path := CleanPath(rawPath) 116 117 // Nothing to do. 118 if path == "." { 119 return ".", nil 120 } 121 122 if filepath.IsAbs(path) { 123 path = strings.TrimPrefix(path, "/") 124 } 125 126 // Check that the path is "safe", meaning that it doesn't resolve outside 127 // of the tar archive. While this might seem paranoid, it is a legitimate 128 // concern. 129 if "/"+path != filepath.Join("/", path) { 130 return "", errors.Errorf("escape warning: generated path is outside tar root: %s", rawPath) 131 } 132 133 // With some other tar formats, you needed to have a '/' at the end of a 134 // pathname in order to state that it is a directory. While this is no 135 // longer necessary, some older tooling may assume that. 136 if isDir { 137 path += "/" 138 } 139 140 return path, nil 141 } 142 143 // AddFile adds a file from the filesystem to the tar archive. It copies all of 144 // the relevant stat information about the file, and also attempts to track 145 // hardlinks. This should be functionally equivalent to adding entries with GNU 146 // tar. 147 func (tg *tarGenerator) AddFile(name, path string) error { 148 fi, err := tg.fsEval.Lstat(path) 149 if err != nil { 150 return errors.Wrap(err, "add file lstat") 151 } 152 153 linkname := "" 154 if fi.Mode()&os.ModeSymlink == os.ModeSymlink { 155 if linkname, err = tg.fsEval.Readlink(path); err != nil { 156 return errors.Wrap(err, "add file readlink") 157 } 158 } 159 160 hdr, err := tar.FileInfoHeader(fi, linkname) 161 if err != nil { 162 return errors.Wrap(err, "convert fi to hdr") 163 } 164 hdr.Xattrs = map[string]string{} 165 // Usually incorrect for containers and was added in Go 1.10 causing 166 // changes to our output on a compiler bump... 167 hdr.Uname = "" 168 hdr.Gname = "" 169 170 name, err = normalise(name, fi.IsDir()) 171 if err != nil { 172 return errors.Wrap(err, "normalise path") 173 } 174 hdr.Name = name 175 176 // Make sure that we don't include any files with the name ".wh.". This 177 // will almost certainly confuse some users (unfortunately) but there's 178 // nothing we can do to store such files on-disk. 179 if strings.HasPrefix(filepath.Base(name), whPrefix) { 180 return errors.Errorf("invalid path has whiteout prefix %q: %s", whPrefix, name) 181 } 182 183 // FIXME: Do we need to ensure that the parent paths have all been added to 184 // the archive? I haven't found any tar specification that makes 185 // this mandatory, but I have a feeling that some people might rely 186 // on it. The issue with implementing it is that we'd have to get 187 // the FileInfo about the directory from somewhere (and we don't 188 // want to waste space by adding an entry that will be overwritten 189 // later). 190 191 // Different systems have different special things they need to set within 192 // a tar header. For example, device numbers are quite important to be set 193 // by us. 194 statx, err := tg.fsEval.Lstatx(path) 195 if err != nil { 196 return errors.Wrapf(err, "lstatx %q", path) 197 } 198 updateHeader(hdr, statx) 199 200 // Set up xattrs externally to updateHeader because the function signature 201 // would look really dumb otherwise. 202 // XXX: This should probably be moved to a function in tar_unix.go. 203 names, err := tg.fsEval.Llistxattr(path) 204 if err != nil { 205 if errors.Cause(err) != unix.EOPNOTSUPP { 206 return errors.Wrap(err, "get xattr list") 207 } 208 names = []string{} 209 } 210 for _, name := range names { 211 // Some xattrs need to be skipped for sanity reasons, such as 212 // security.selinux, because they are very much host-specific and 213 // carrying them to other hosts would be a really bad idea. 214 if _, ignore := ignoreXattrs[name]; ignore { 215 continue 216 } 217 // TODO: We should translate all v3 capabilities into root-owned 218 // capabilities here. But we don't have Go code for that yet 219 // (we'd need to use libcap to parse it). 220 value, err := tg.fsEval.Lgetxattr(path, name) 221 if err != nil { 222 v := errors.Cause(err) 223 log.Debugf("failure reading xattr from list on %q: %q", name, v) 224 if v != unix.EOPNOTSUPP && v != unix.ENODATA { 225 // XXX: I'm not sure if we're unprivileged whether Lgetxattr can 226 // fail with EPERM. If it can, we should ignore it (like when 227 // we try to clear xattrs). 228 return errors.Wrapf(err, "get xattr: %s", name) 229 } 230 } 231 // https://golang.org/issues/20698 -- We don't just error out here 232 // because it's not _really_ a fatal error. Currently it's unclear 233 // whether the stdlib will correctly handle reading or disable writing 234 // of these PAX headers so we have to track this ourselves. 235 if len(value) <= 0 { 236 log.Warnf("ignoring empty-valued xattr %s: disallowed by PAX standard", name) 237 continue 238 } 239 // Note that Go strings can actually be arbitrary byte sequences, so 240 // this conversion (while it might look a bit wrong) is actually fine. 241 hdr.Xattrs[name] = string(value) 242 } 243 244 // Not all systems have the concept of an inode, but I'm not in the mood to 245 // handle this in a way that makes anything other than GNU/Linux happy 246 // right now. Handle hardlinks. 247 if oldpath, ok := tg.inodes[statx.Ino]; ok { 248 // We just hit a hardlink, so we just have to change the header. 249 hdr.Typeflag = tar.TypeLink 250 hdr.Linkname = oldpath 251 hdr.Size = 0 252 } else { 253 tg.inodes[statx.Ino] = name 254 } 255 256 // Apply any header mappings. 257 if err := mapHeader(hdr, tg.mapOptions); err != nil { 258 return errors.Wrap(err, "map header") 259 } 260 if err := tg.tw.WriteHeader(hdr); err != nil { 261 return errors.Wrap(err, "write header") 262 } 263 264 // Write the contents of regular files. 265 if hdr.Typeflag == tar.TypeReg { 266 fh, err := tg.fsEval.Open(path) 267 if err != nil { 268 return errors.Wrap(err, "open file") 269 } 270 defer fh.Close() 271 272 n, err := system.Copy(tg.tw, fh) 273 if err != nil { 274 return errors.Wrap(err, "copy to layer") 275 } 276 if n != hdr.Size { 277 return errors.Wrap(io.ErrShortWrite, "copy to layer") 278 } 279 } 280 281 return nil 282 } 283 284 // whPrefix is the whiteout prefix, which is used to signify "special" files in 285 // an OCI image layer archive. An expanded filesystem image cannot contain 286 // files that have a basename starting with this prefix. 287 const whPrefix = ".wh." 288 289 // whOpaque is the *full* basename of a special file which indicates that all 290 // siblings in a directory are to be dropped in the "lower" layer. 291 const whOpaque = whPrefix + whPrefix + ".opq" 292 293 // addWhiteout adds a whiteout file for the given name inside the tar archive. 294 // It's not recommended to add a file with AddFile and then white it out. If 295 // you specify opaque, then the whiteout created is an opaque whiteout *for the 296 // directory path* given. 297 func (tg *tarGenerator) addWhiteout(name string, opaque bool) error { 298 name, err := normalise(name, false) 299 if err != nil { 300 return errors.Wrap(err, "normalise path") 301 } 302 303 // Disallow having a whiteout of a whiteout, purely for our own sanity. 304 dir, file := filepath.Split(name) 305 if strings.HasPrefix(file, whPrefix) { 306 return errors.Errorf("invalid path has whiteout prefix %q: %s", whPrefix, name) 307 } 308 309 // Figure out the whiteout name. 310 whiteout := filepath.Join(dir, whPrefix+file) 311 if opaque { 312 whiteout = filepath.Join(name, whOpaque) 313 } 314 315 // Add a dummy header for the whiteout file. 316 return errors.Wrap(tg.tw.WriteHeader(&tar.Header{ 317 Name: whiteout, 318 Size: 0, 319 }), "write whiteout header") 320 } 321 322 // AddWhiteout creates a whiteout for the provided path. 323 func (tg *tarGenerator) AddWhiteout(name string) error { 324 return tg.addWhiteout(name, false) 325 } 326 327 // AddOpaqueWhiteout creates a whiteout for the provided path. 328 func (tg *tarGenerator) AddOpaqueWhiteout(name string) error { 329 return tg.addWhiteout(name, true) 330 }