github.com/dctrud/umoci@v0.4.3-0.20191016193643-05a1d37de015/oci/layer/tar_generate.go (about) 1 /* 2 * umoci: Umoci Modifies Open Containers' Images 3 * Copyright (C) 2016, 2017, 2018 SUSE LLC. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package layer 19 20 import ( 21 "archive/tar" 22 "io" 23 "os" 24 "path/filepath" 25 "strings" 26 27 "github.com/apex/log" 28 "github.com/openSUSE/umoci/pkg/fseval" 29 "github.com/openSUSE/umoci/pkg/testutils" 30 "github.com/pkg/errors" 31 ) 32 33 // ignoreXattrs is a list of xattr names that should be ignored when 34 // creating a new image layer, because they are host-specific and/or would be a 35 // bad idea to unpack. They are also excluded from Lclearxattr when extracting 36 // an archive. 37 // XXX: Maybe we should make this configurable so users can manually blacklist 38 // (or even whitelist) xattrs that they actually want included? Like how 39 // GNU tar's xattr setup works. 40 var ignoreXattrs = map[string]struct{}{ 41 // SELinux doesn't allow you to set SELinux policies generically. They're 42 // also host-specific. So just ignore them during extraction. 43 "security.selinux": {}, 44 45 // NFSv4 ACLs are very system-specific and shouldn't be touched by us, nor 46 // should they be included in images. 47 "system.nfs4_acl": {}, 48 } 49 50 func init() { 51 // For test purposes we add a fake forbidden attribute that an unprivileged 52 // user can easily write to (and thus we can test it). 53 if testutils.IsTestBinary() { 54 ignoreXattrs["user.UMOCI:forbidden_xattr"] = struct{}{} 55 } 56 } 57 58 // tarGenerator is a helper for generating layer diff tars. It should be noted 59 // that when using tarGenerator.Add{Path,Whiteout} it is recommended to do it 60 // in lexicographic order. 61 type tarGenerator struct { 62 tw *tar.Writer 63 64 // mapOptions is the set of mapping options for modifying entries before 65 // they're added to the layer. 66 mapOptions MapOptions 67 68 // Hardlink mapping. 69 inodes map[uint64]string 70 71 // fsEval is an fseval.FsEval used for extraction. 72 fsEval fseval.FsEval 73 74 // XXX: Should we add a saftey check to make sure we don't generate two of 75 // the same path in a tar archive? This is not permitted by the spec. 76 } 77 78 // newTarGenerator creates a new tarGenerator using the provided writer as the 79 // output writer. 80 func newTarGenerator(w io.Writer, opt MapOptions) *tarGenerator { 81 fsEval := fseval.DefaultFsEval 82 if opt.Rootless { 83 fsEval = fseval.RootlessFsEval 84 } 85 86 return &tarGenerator{ 87 tw: tar.NewWriter(w), 88 mapOptions: opt, 89 inodes: map[uint64]string{}, 90 fsEval: fsEval, 91 } 92 } 93 94 // normalise converts the provided pathname to a POSIX-compliant pathname. It also will provide an error if a path looks unsafe. 95 func normalise(rawPath string, isDir bool) (string, error) { 96 // Clean up the path. 97 path := CleanPath(rawPath) 98 99 // Nothing to do. 100 if path == "." { 101 return ".", nil 102 } 103 104 if filepath.IsAbs(path) { 105 path = strings.TrimPrefix(path, "/") 106 } 107 108 // Check that the path is "safe", meaning that it doesn't resolve outside 109 // of the tar archive. While this might seem paranoid, it is a legitimate 110 // concern. 111 if "/"+path != filepath.Join("/", path) { 112 return "", errors.Errorf("escape warning: generated path is outside tar root: %s", rawPath) 113 } 114 115 // With some other tar formats, you needed to have a '/' at the end of a 116 // pathname in order to state that it is a directory. While this is no 117 // longer necessary, some older tooling may assume that. 118 if isDir { 119 path += "/" 120 } 121 122 return path, nil 123 } 124 125 // AddFile adds a file from the filesystem to the tar archive. It copies all of 126 // the relevant stat information about the file, and also attempts to track 127 // hardlinks. This should be functionally equivalent to adding entries with GNU 128 // tar. 129 func (tg *tarGenerator) AddFile(name, path string) error { 130 fi, err := tg.fsEval.Lstat(path) 131 if err != nil { 132 return errors.Wrap(err, "add file lstat") 133 } 134 135 linkname := "" 136 if fi.Mode()&os.ModeSymlink == os.ModeSymlink { 137 if linkname, err = tg.fsEval.Readlink(path); err != nil { 138 return errors.Wrap(err, "add file readlink") 139 } 140 } 141 142 hdr, err := tar.FileInfoHeader(fi, linkname) 143 if err != nil { 144 return errors.Wrap(err, "convert fi to hdr") 145 } 146 hdr.Xattrs = map[string]string{} 147 148 name, err = normalise(name, fi.IsDir()) 149 if err != nil { 150 return errors.Wrap(err, "normalise path") 151 } 152 hdr.Name = name 153 154 // Make sure that we don't include any files with the name ".wh.". This 155 // will almost certainly confuse some users (unfortunately) but there's 156 // nothing we can do to store such files on-disk. 157 if strings.HasPrefix(filepath.Base(name), whPrefix) { 158 return errors.Errorf("invalid path has whiteout prefix %q: %s", whPrefix, name) 159 } 160 161 // FIXME: Do we need to ensure that the parent paths have all been added to 162 // the archive? I haven't found any tar specification that makes 163 // this mandatory, but I have a feeling that some people might rely 164 // on it. The issue with implementing it is that we'd have to get 165 // the FileInfo about the directory from somewhere (and we don't 166 // want to waste space by adding an entry that will be overwritten 167 // later). 168 169 // Different systems have different special things they need to set within 170 // a tar header. For example, device numbers are quite important to be set 171 // by us. 172 statx, err := tg.fsEval.Lstatx(path) 173 if err != nil { 174 return errors.Wrapf(err, "lstatx %q", path) 175 } 176 updateHeader(hdr, statx) 177 178 // Set up xattrs externally to updateHeader because the function signature 179 // would look really dumb otherwise. 180 // XXX: This should probably be moved to a function in tar_unix.go. 181 names, err := tg.fsEval.Llistxattr(path) 182 if err != nil { 183 return errors.Wrap(err, "get xattr list") 184 } 185 for _, name := range names { 186 // Some xattrs need to be skipped for sanity reasons, such as 187 // security.selinux, because they are very much host-specific and 188 // carrying them to other hosts would be a really bad idea. 189 if _, ignore := ignoreXattrs[name]; ignore { 190 continue 191 } 192 // TODO: We should translate all v3 capabilities into root-owned 193 // capabilities here. But we don't have Go code for that yet 194 // (we'd need to use libcap to parse it). 195 value, err := tg.fsEval.Lgetxattr(path, name) 196 if err != nil { 197 // XXX: I'm not sure if we're unprivileged whether Lgetxattr can 198 // fail with EPERM. If it can, we should ignore it (like when 199 // we try to clear xattrs). 200 return errors.Wrapf(err, "get xattr: %s", name) 201 } 202 // https://golang.org/issues/20698 -- We don't just error out here 203 // because it's not _really_ a fatal error. Currently it's unclear 204 // whether the stdlib will correctly handle reading or disable writing 205 // of these PAX headers so we have to track this ourselves. 206 if len(value) <= 0 { 207 log.Warnf("ignoring empty-valued xattr %s: disallowed by PAX standard", name) 208 continue 209 } 210 // Note that Go strings can actually be arbitrary byte sequences, so 211 // this conversion (while it might look a bit wrong) is actually fine. 212 hdr.Xattrs[name] = string(value) 213 } 214 215 // Not all systems have the concept of an inode, but I'm not in the mood to 216 // handle this in a way that makes anything other than GNU/Linux happy 217 // right now. Handle hardlinks. 218 if oldpath, ok := tg.inodes[statx.Ino]; ok { 219 // We just hit a hardlink, so we just have to change the header. 220 hdr.Typeflag = tar.TypeLink 221 hdr.Linkname = oldpath 222 hdr.Size = 0 223 } else { 224 tg.inodes[statx.Ino] = name 225 } 226 227 // Apply any header mappings. 228 if err := mapHeader(hdr, tg.mapOptions); err != nil { 229 return errors.Wrap(err, "map header") 230 } 231 if err := tg.tw.WriteHeader(hdr); err != nil { 232 return errors.Wrap(err, "write header") 233 } 234 235 // Write the contents of regular files. 236 if hdr.Typeflag == tar.TypeReg { 237 fh, err := tg.fsEval.Open(path) 238 if err != nil { 239 return errors.Wrap(err, "open file") 240 } 241 defer fh.Close() 242 243 n, err := io.Copy(tg.tw, fh) 244 if err != nil { 245 return errors.Wrap(err, "copy to layer") 246 } 247 if n != hdr.Size { 248 return errors.Wrap(io.ErrShortWrite, "copy to layer") 249 } 250 } 251 252 return nil 253 } 254 255 // whPrefix is the whiteout prefix, which is used to signify "special" files in 256 // an OCI image layer archive. An expanded filesystem image cannot contain 257 // files that have a basename starting with this prefix. 258 const whPrefix = ".wh." 259 260 // whOpaque is the *full* basename of a special file which indicates that all 261 // siblings in a directory are to be dropped in the "lower" layer. 262 const whOpaque = whPrefix + whPrefix + ".opq" 263 264 // addWhiteout adds a whiteout file for the given name inside the tar archive. 265 // It's not recommended to add a file with AddFile and then white it out. If 266 // you specify opaque, then the whiteout created is an opaque whiteout *for the 267 // directory path* given. 268 func (tg *tarGenerator) addWhiteout(name string, opaque bool) error { 269 name, err := normalise(name, false) 270 if err != nil { 271 return errors.Wrap(err, "normalise path") 272 } 273 274 // Disallow having a whiteout of a whiteout, purely for our own sanity. 275 dir, file := filepath.Split(name) 276 if strings.HasPrefix(file, whPrefix) { 277 return errors.Errorf("invalid path has whiteout prefix %q: %s", whPrefix, name) 278 } 279 280 // Figure out the whiteout name. 281 whiteout := filepath.Join(dir, whPrefix+file) 282 if opaque { 283 whiteout = filepath.Join(name, whOpaque) 284 } 285 286 // Add a dummy header for the whiteout file. 287 return errors.Wrap(tg.tw.WriteHeader(&tar.Header{ 288 Name: whiteout, 289 Size: 0, 290 }), "write whiteout header") 291 } 292 293 // AddWhiteout creates a whiteout for the provided path. 294 func (tg *tarGenerator) AddWhiteout(name string) error { 295 return tg.addWhiteout(name, false) 296 } 297 298 // AddOpaqueWhiteout creates a whiteout for the provided path. 299 func (tg *tarGenerator) AddOpaqueWhiteout(name string) error { 300 return tg.addWhiteout(name, true) 301 }