github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/engine/pkg/archive/changes.go (about) 1 package archive // import "github.com/docker/docker/pkg/archive" 2 3 import ( 4 "archive/tar" 5 "bytes" 6 "fmt" 7 "io" 8 "os" 9 "path/filepath" 10 "sort" 11 "strings" 12 "syscall" 13 "time" 14 15 "github.com/docker/docker/pkg/idtools" 16 "github.com/docker/docker/pkg/pools" 17 "github.com/docker/docker/pkg/system" 18 "github.com/sirupsen/logrus" 19 ) 20 21 // ChangeType represents the change type. 22 type ChangeType int 23 24 const ( 25 // ChangeModify represents the modify operation. 26 ChangeModify = iota 27 // ChangeAdd represents the add operation. 28 ChangeAdd 29 // ChangeDelete represents the delete operation. 30 ChangeDelete 31 ) 32 33 func (c ChangeType) String() string { 34 switch c { 35 case ChangeModify: 36 return "C" 37 case ChangeAdd: 38 return "A" 39 case ChangeDelete: 40 return "D" 41 } 42 return "" 43 } 44 45 // Change represents a change, it wraps the change type and path. 46 // It describes changes of the files in the path respect to the 47 // parent layers. The change could be modify, add, delete. 48 // This is used for layer diff. 49 type Change struct { 50 Path string 51 Kind ChangeType 52 } 53 54 func (change *Change) String() string { 55 return fmt.Sprintf("%s %s", change.Kind, change.Path) 56 } 57 58 // for sort.Sort 59 type changesByPath []Change 60 61 func (c changesByPath) Less(i, j int) bool { return c[i].Path < c[j].Path } 62 func (c changesByPath) Len() int { return len(c) } 63 func (c changesByPath) Swap(i, j int) { c[j], c[i] = c[i], c[j] } 64 65 // Gnu tar doesn't have sub-second mtime precision. The go tar 66 // writer (1.10+) does when using PAX format, but we round times to seconds 67 // to ensure archives have the same hashes for backwards compatibility. 68 // See https://github.com/moby/moby/pull/35739/commits/fb170206ba12752214630b269a40ac7be6115ed4. 69 // 70 // Non-sub-second is problematic when we apply changes via tar 71 // files. We handle this by comparing for exact times, *or* same 72 // second count and either a or b having exactly 0 nanoseconds 73 func sameFsTime(a, b time.Time) bool { 74 return a.Equal(b) || 75 (a.Unix() == b.Unix() && 76 (a.Nanosecond() == 0 || b.Nanosecond() == 0)) 77 } 78 79 func sameFsTimeSpec(a, b syscall.Timespec) bool { 80 return a.Sec == b.Sec && 81 (a.Nsec == b.Nsec || a.Nsec == 0 || b.Nsec == 0) 82 } 83 84 // Changes walks the path rw and determines changes for the files in the path, 85 // with respect to the parent layers 86 func Changes(layers []string, rw string) ([]Change, error) { 87 return changes(layers, rw, aufsDeletedFile, aufsMetadataSkip) 88 } 89 90 func aufsMetadataSkip(path string) (skip bool, err error) { 91 skip, err = filepath.Match(string(os.PathSeparator)+WhiteoutMetaPrefix+"*", path) 92 if err != nil { 93 skip = true 94 } 95 return 96 } 97 98 func aufsDeletedFile(root, path string, fi os.FileInfo) (string, error) { 99 f := filepath.Base(path) 100 101 // If there is a whiteout, then the file was removed 102 if strings.HasPrefix(f, WhiteoutPrefix) { 103 originalFile := f[len(WhiteoutPrefix):] 104 return filepath.Join(filepath.Dir(path), originalFile), nil 105 } 106 107 return "", nil 108 } 109 110 type skipChange func(string) (bool, error) 111 type deleteChange func(string, string, os.FileInfo) (string, error) 112 113 func changes(layers []string, rw string, dc deleteChange, sc skipChange) ([]Change, error) { 114 var ( 115 changes []Change 116 changedDirs = make(map[string]struct{}) 117 ) 118 119 err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error { 120 if err != nil { 121 return err 122 } 123 124 // Rebase path 125 path, err = filepath.Rel(rw, path) 126 if err != nil { 127 return err 128 } 129 130 // As this runs on the daemon side, file paths are OS specific. 131 path = filepath.Join(string(os.PathSeparator), path) 132 133 // Skip root 134 if path == string(os.PathSeparator) { 135 return nil 136 } 137 138 if sc != nil { 139 if skip, err := sc(path); skip { 140 return err 141 } 142 } 143 144 change := Change{ 145 Path: path, 146 } 147 148 deletedFile, err := dc(rw, path, f) 149 if err != nil { 150 return err 151 } 152 153 // Find out what kind of modification happened 154 if deletedFile != "" { 155 change.Path = deletedFile 156 change.Kind = ChangeDelete 157 } else { 158 // Otherwise, the file was added 159 change.Kind = ChangeAdd 160 161 // ...Unless it already existed in a top layer, in which case, it's a modification 162 for _, layer := range layers { 163 stat, err := os.Stat(filepath.Join(layer, path)) 164 if err != nil && !os.IsNotExist(err) { 165 return err 166 } 167 if err == nil { 168 // The file existed in the top layer, so that's a modification 169 170 // However, if it's a directory, maybe it wasn't actually modified. 171 // If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar 172 if stat.IsDir() && f.IsDir() { 173 if f.Size() == stat.Size() && f.Mode() == stat.Mode() && sameFsTime(f.ModTime(), stat.ModTime()) { 174 // Both directories are the same, don't record the change 175 return nil 176 } 177 } 178 change.Kind = ChangeModify 179 break 180 } 181 } 182 } 183 184 // If /foo/bar/file.txt is modified, then /foo/bar must be part of the changed files. 185 // This block is here to ensure the change is recorded even if the 186 // modify time, mode and size of the parent directory in the rw and ro layers are all equal. 187 // Check https://github.com/docker/docker/pull/13590 for details. 188 if f.IsDir() { 189 changedDirs[path] = struct{}{} 190 } 191 if change.Kind == ChangeAdd || change.Kind == ChangeDelete { 192 parent := filepath.Dir(path) 193 if _, ok := changedDirs[parent]; !ok && parent != "/" { 194 changes = append(changes, Change{Path: parent, Kind: ChangeModify}) 195 changedDirs[parent] = struct{}{} 196 } 197 } 198 199 // Record change 200 changes = append(changes, change) 201 return nil 202 }) 203 if err != nil && !os.IsNotExist(err) { 204 return nil, err 205 } 206 return changes, nil 207 } 208 209 // FileInfo describes the information of a file. 210 type FileInfo struct { 211 parent *FileInfo 212 name string 213 stat *system.StatT 214 children map[string]*FileInfo 215 capability []byte 216 added bool 217 } 218 219 // LookUp looks up the file information of a file. 220 func (info *FileInfo) LookUp(path string) *FileInfo { 221 // As this runs on the daemon side, file paths are OS specific. 222 parent := info 223 if path == string(os.PathSeparator) { 224 return info 225 } 226 227 pathElements := strings.Split(path, string(os.PathSeparator)) 228 for _, elem := range pathElements { 229 if elem != "" { 230 child := parent.children[elem] 231 if child == nil { 232 return nil 233 } 234 parent = child 235 } 236 } 237 return parent 238 } 239 240 func (info *FileInfo) path() string { 241 if info.parent == nil { 242 // As this runs on the daemon side, file paths are OS specific. 243 return string(os.PathSeparator) 244 } 245 return filepath.Join(info.parent.path(), info.name) 246 } 247 248 func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) { 249 250 sizeAtEntry := len(*changes) 251 252 if oldInfo == nil { 253 // add 254 change := Change{ 255 Path: info.path(), 256 Kind: ChangeAdd, 257 } 258 *changes = append(*changes, change) 259 info.added = true 260 } 261 262 // We make a copy so we can modify it to detect additions 263 // also, we only recurse on the old dir if the new info is a directory 264 // otherwise any previous delete/change is considered recursive 265 oldChildren := make(map[string]*FileInfo) 266 if oldInfo != nil && info.isDir() { 267 for k, v := range oldInfo.children { 268 oldChildren[k] = v 269 } 270 } 271 272 for name, newChild := range info.children { 273 oldChild := oldChildren[name] 274 if oldChild != nil { 275 // change? 276 oldStat := oldChild.stat 277 newStat := newChild.stat 278 // Note: We can't compare inode or ctime or blocksize here, because these change 279 // when copying a file into a container. However, that is not generally a problem 280 // because any content change will change mtime, and any status change should 281 // be visible when actually comparing the stat fields. The only time this 282 // breaks down is if some code intentionally hides a change by setting 283 // back mtime 284 if statDifferent(oldStat, newStat) || 285 !bytes.Equal(oldChild.capability, newChild.capability) { 286 change := Change{ 287 Path: newChild.path(), 288 Kind: ChangeModify, 289 } 290 *changes = append(*changes, change) 291 newChild.added = true 292 } 293 294 // Remove from copy so we can detect deletions 295 delete(oldChildren, name) 296 } 297 298 newChild.addChanges(oldChild, changes) 299 } 300 for _, oldChild := range oldChildren { 301 // delete 302 change := Change{ 303 Path: oldChild.path(), 304 Kind: ChangeDelete, 305 } 306 *changes = append(*changes, change) 307 } 308 309 // If there were changes inside this directory, we need to add it, even if the directory 310 // itself wasn't changed. This is needed to properly save and restore filesystem permissions. 311 // As this runs on the daemon side, file paths are OS specific. 312 if len(*changes) > sizeAtEntry && info.isDir() && !info.added && info.path() != string(os.PathSeparator) { 313 change := Change{ 314 Path: info.path(), 315 Kind: ChangeModify, 316 } 317 // Let's insert the directory entry before the recently added entries located inside this dir 318 *changes = append(*changes, change) // just to resize the slice, will be overwritten 319 copy((*changes)[sizeAtEntry+1:], (*changes)[sizeAtEntry:]) 320 (*changes)[sizeAtEntry] = change 321 } 322 323 } 324 325 // Changes add changes to file information. 326 func (info *FileInfo) Changes(oldInfo *FileInfo) []Change { 327 var changes []Change 328 329 info.addChanges(oldInfo, &changes) 330 331 return changes 332 } 333 334 func newRootFileInfo() *FileInfo { 335 // As this runs on the daemon side, file paths are OS specific. 336 root := &FileInfo{ 337 name: string(os.PathSeparator), 338 children: make(map[string]*FileInfo), 339 } 340 return root 341 } 342 343 // ChangesDirs compares two directories and generates an array of Change objects describing the changes. 344 // If oldDir is "", then all files in newDir will be Add-Changes. 345 func ChangesDirs(newDir, oldDir string) ([]Change, error) { 346 var ( 347 oldRoot, newRoot *FileInfo 348 ) 349 if oldDir == "" { 350 emptyDir, err := os.MkdirTemp("", "empty") 351 if err != nil { 352 return nil, err 353 } 354 defer os.Remove(emptyDir) 355 oldDir = emptyDir 356 } 357 oldRoot, newRoot, err := collectFileInfoForChanges(oldDir, newDir) 358 if err != nil { 359 return nil, err 360 } 361 362 return newRoot.Changes(oldRoot), nil 363 } 364 365 // ChangesSize calculates the size in bytes of the provided changes, based on newDir. 366 func ChangesSize(newDir string, changes []Change) int64 { 367 var ( 368 size int64 369 sf = make(map[uint64]struct{}) 370 ) 371 for _, change := range changes { 372 if change.Kind == ChangeModify || change.Kind == ChangeAdd { 373 file := filepath.Join(newDir, change.Path) 374 fileInfo, err := os.Lstat(file) 375 if err != nil { 376 logrus.Errorf("Can not stat %q: %s", file, err) 377 continue 378 } 379 380 if fileInfo != nil && !fileInfo.IsDir() { 381 if hasHardlinks(fileInfo) { 382 inode := getIno(fileInfo) 383 if _, ok := sf[inode]; !ok { 384 size += fileInfo.Size() 385 sf[inode] = struct{}{} 386 } 387 } else { 388 size += fileInfo.Size() 389 } 390 } 391 } 392 } 393 return size 394 } 395 396 // ExportChanges produces an Archive from the provided changes, relative to dir. 397 func ExportChanges(dir string, changes []Change, uidMaps, gidMaps []idtools.IDMap) (io.ReadCloser, error) { 398 reader, writer := io.Pipe() 399 go func() { 400 ta := newTarAppender(idtools.NewIDMappingsFromMaps(uidMaps, gidMaps), writer, nil) 401 402 // this buffer is needed for the duration of this piped stream 403 defer pools.BufioWriter32KPool.Put(ta.Buffer) 404 405 sort.Sort(changesByPath(changes)) 406 407 // In general we log errors here but ignore them because 408 // during e.g. a diff operation the container can continue 409 // mutating the filesystem and we can see transient errors 410 // from this 411 for _, change := range changes { 412 if change.Kind == ChangeDelete { 413 whiteOutDir := filepath.Dir(change.Path) 414 whiteOutBase := filepath.Base(change.Path) 415 whiteOut := filepath.Join(whiteOutDir, WhiteoutPrefix+whiteOutBase) 416 timestamp := time.Now() 417 hdr := &tar.Header{ 418 Name: whiteOut[1:], 419 Size: 0, 420 ModTime: timestamp, 421 AccessTime: timestamp, 422 ChangeTime: timestamp, 423 } 424 if err := ta.TarWriter.WriteHeader(hdr); err != nil { 425 logrus.Debugf("Can't write whiteout header: %s", err) 426 } 427 } else { 428 path := filepath.Join(dir, change.Path) 429 if err := ta.addTarFile(path, change.Path[1:]); err != nil { 430 logrus.Debugf("Can't add file %s to tar: %s", path, err) 431 } 432 } 433 } 434 435 // Make sure to check the error on Close. 436 if err := ta.TarWriter.Close(); err != nil { 437 logrus.Debugf("Can't close layer: %s", err) 438 } 439 if err := writer.Close(); err != nil { 440 logrus.Debugf("failed close Changes writer: %s", err) 441 } 442 }() 443 return reader, nil 444 }