github.com/moby/docker@v26.1.3+incompatible/pkg/archive/changes.go (about) 1 package archive // import "github.com/docker/docker/pkg/archive" 2 3 import ( 4 "archive/tar" 5 "bytes" 6 "context" 7 "fmt" 8 "io" 9 "os" 10 "path/filepath" 11 "sort" 12 "strings" 13 "syscall" 14 "time" 15 16 "github.com/containerd/log" 17 "github.com/docker/docker/pkg/idtools" 18 "github.com/docker/docker/pkg/pools" 19 "github.com/docker/docker/pkg/system" 20 ) 21 22 // ChangeType represents the change type. 23 type ChangeType int 24 25 const ( 26 // ChangeModify represents the modify operation. 27 ChangeModify = iota 28 // ChangeAdd represents the add operation. 29 ChangeAdd 30 // ChangeDelete represents the delete operation. 31 ChangeDelete 32 ) 33 34 func (c ChangeType) String() string { 35 switch c { 36 case ChangeModify: 37 return "C" 38 case ChangeAdd: 39 return "A" 40 case ChangeDelete: 41 return "D" 42 } 43 return "" 44 } 45 46 // Change represents a change, it wraps the change type and path. 47 // It describes changes of the files in the path respect to the 48 // parent layers. The change could be modify, add, delete. 49 // This is used for layer diff. 50 type Change struct { 51 Path string 52 Kind ChangeType 53 } 54 55 func (change *Change) String() string { 56 return fmt.Sprintf("%s %s", change.Kind, change.Path) 57 } 58 59 // for sort.Sort 60 type changesByPath []Change 61 62 func (c changesByPath) Less(i, j int) bool { return c[i].Path < c[j].Path } 63 func (c changesByPath) Len() int { return len(c) } 64 func (c changesByPath) Swap(i, j int) { c[j], c[i] = c[i], c[j] } 65 66 // Gnu tar doesn't have sub-second mtime precision. The go tar 67 // writer (1.10+) does when using PAX format, but we round times to seconds 68 // to ensure archives have the same hashes for backwards compatibility. 69 // See https://github.com/moby/moby/pull/35739/commits/fb170206ba12752214630b269a40ac7be6115ed4. 70 // 71 // Non-sub-second is problematic when we apply changes via tar 72 // files. We handle this by comparing for exact times, *or* same 73 // second count and either a or b having exactly 0 nanoseconds 74 func sameFsTime(a, b time.Time) bool { 75 return a.Equal(b) || 76 (a.Unix() == b.Unix() && 77 (a.Nanosecond() == 0 || b.Nanosecond() == 0)) 78 } 79 80 func sameFsTimeSpec(a, b syscall.Timespec) bool { 81 return a.Sec == b.Sec && 82 (a.Nsec == b.Nsec || a.Nsec == 0 || b.Nsec == 0) 83 } 84 85 // Changes walks the path rw and determines changes for the files in the path, 86 // with respect to the parent layers 87 func Changes(layers []string, rw string) ([]Change, error) { 88 return changes(layers, rw, aufsDeletedFile, aufsMetadataSkip) 89 } 90 91 func aufsMetadataSkip(path string) (skip bool, err error) { 92 skip, err = filepath.Match(string(os.PathSeparator)+WhiteoutMetaPrefix+"*", path) 93 if err != nil { 94 skip = true 95 } 96 return 97 } 98 99 func aufsDeletedFile(root, path string, fi os.FileInfo) (string, error) { 100 f := filepath.Base(path) 101 102 // If there is a whiteout, then the file was removed 103 if strings.HasPrefix(f, WhiteoutPrefix) { 104 originalFile := f[len(WhiteoutPrefix):] 105 return filepath.Join(filepath.Dir(path), originalFile), nil 106 } 107 108 return "", nil 109 } 110 111 type ( 112 skipChange func(string) (bool, error) 113 deleteChange func(string, string, os.FileInfo) (string, error) 114 ) 115 116 func changes(layers []string, rw string, dc deleteChange, sc skipChange) ([]Change, error) { 117 var ( 118 changes []Change 119 changedDirs = make(map[string]struct{}) 120 ) 121 122 err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error { 123 if err != nil { 124 return err 125 } 126 127 // Rebase path 128 path, err = filepath.Rel(rw, path) 129 if err != nil { 130 return err 131 } 132 133 // As this runs on the daemon side, file paths are OS specific. 134 path = filepath.Join(string(os.PathSeparator), path) 135 136 // Skip root 137 if path == string(os.PathSeparator) { 138 return nil 139 } 140 141 if sc != nil { 142 if skip, err := sc(path); skip { 143 return err 144 } 145 } 146 147 change := Change{ 148 Path: path, 149 } 150 151 deletedFile, err := dc(rw, path, f) 152 if err != nil { 153 return err 154 } 155 156 // Find out what kind of modification happened 157 if deletedFile != "" { 158 change.Path = deletedFile 159 change.Kind = ChangeDelete 160 } else { 161 // Otherwise, the file was added 162 change.Kind = ChangeAdd 163 164 // ...Unless it already existed in a top layer, in which case, it's a modification 165 for _, layer := range layers { 166 stat, err := os.Stat(filepath.Join(layer, path)) 167 if err != nil && !os.IsNotExist(err) { 168 return err 169 } 170 if err == nil { 171 // The file existed in the top layer, so that's a modification 172 173 // However, if it's a directory, maybe it wasn't actually modified. 174 // If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar 175 if stat.IsDir() && f.IsDir() { 176 if f.Size() == stat.Size() && f.Mode() == stat.Mode() && sameFsTime(f.ModTime(), stat.ModTime()) { 177 // Both directories are the same, don't record the change 178 return nil 179 } 180 } 181 change.Kind = ChangeModify 182 break 183 } 184 } 185 } 186 187 // If /foo/bar/file.txt is modified, then /foo/bar must be part of the changed files. 188 // This block is here to ensure the change is recorded even if the 189 // modify time, mode and size of the parent directory in the rw and ro layers are all equal. 190 // Check https://github.com/docker/docker/pull/13590 for details. 191 if f.IsDir() { 192 changedDirs[path] = struct{}{} 193 } 194 if change.Kind == ChangeAdd || change.Kind == ChangeDelete { 195 parent := filepath.Dir(path) 196 if _, ok := changedDirs[parent]; !ok && parent != "/" { 197 changes = append(changes, Change{Path: parent, Kind: ChangeModify}) 198 changedDirs[parent] = struct{}{} 199 } 200 } 201 202 // Record change 203 changes = append(changes, change) 204 return nil 205 }) 206 if err != nil && !os.IsNotExist(err) { 207 return nil, err 208 } 209 return changes, nil 210 } 211 212 // FileInfo describes the information of a file. 213 type FileInfo struct { 214 parent *FileInfo 215 name string 216 stat *system.StatT 217 children map[string]*FileInfo 218 capability []byte 219 added bool 220 } 221 222 // LookUp looks up the file information of a file. 223 func (info *FileInfo) LookUp(path string) *FileInfo { 224 // As this runs on the daemon side, file paths are OS specific. 225 parent := info 226 if path == string(os.PathSeparator) { 227 return info 228 } 229 230 pathElements := strings.Split(path, string(os.PathSeparator)) 231 for _, elem := range pathElements { 232 if elem != "" { 233 child := parent.children[elem] 234 if child == nil { 235 return nil 236 } 237 parent = child 238 } 239 } 240 return parent 241 } 242 243 func (info *FileInfo) path() string { 244 if info.parent == nil { 245 // As this runs on the daemon side, file paths are OS specific. 246 return string(os.PathSeparator) 247 } 248 return filepath.Join(info.parent.path(), info.name) 249 } 250 251 func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) { 252 sizeAtEntry := len(*changes) 253 254 if oldInfo == nil { 255 // add 256 change := Change{ 257 Path: info.path(), 258 Kind: ChangeAdd, 259 } 260 *changes = append(*changes, change) 261 info.added = true 262 } 263 264 // We make a copy so we can modify it to detect additions 265 // also, we only recurse on the old dir if the new info is a directory 266 // otherwise any previous delete/change is considered recursive 267 oldChildren := make(map[string]*FileInfo) 268 if oldInfo != nil && info.isDir() { 269 for k, v := range oldInfo.children { 270 oldChildren[k] = v 271 } 272 } 273 274 for name, newChild := range info.children { 275 oldChild := oldChildren[name] 276 if oldChild != nil { 277 // change? 278 oldStat := oldChild.stat 279 newStat := newChild.stat 280 // Note: We can't compare inode or ctime or blocksize here, because these change 281 // when copying a file into a container. However, that is not generally a problem 282 // because any content change will change mtime, and any status change should 283 // be visible when actually comparing the stat fields. The only time this 284 // breaks down is if some code intentionally hides a change by setting 285 // back mtime 286 if statDifferent(oldStat, newStat) || 287 !bytes.Equal(oldChild.capability, newChild.capability) { 288 change := Change{ 289 Path: newChild.path(), 290 Kind: ChangeModify, 291 } 292 *changes = append(*changes, change) 293 newChild.added = true 294 } 295 296 // Remove from copy so we can detect deletions 297 delete(oldChildren, name) 298 } 299 300 newChild.addChanges(oldChild, changes) 301 } 302 for _, oldChild := range oldChildren { 303 // delete 304 change := Change{ 305 Path: oldChild.path(), 306 Kind: ChangeDelete, 307 } 308 *changes = append(*changes, change) 309 } 310 311 // If there were changes inside this directory, we need to add it, even if the directory 312 // itself wasn't changed. This is needed to properly save and restore filesystem permissions. 313 // As this runs on the daemon side, file paths are OS specific. 314 if len(*changes) > sizeAtEntry && info.isDir() && !info.added && info.path() != string(os.PathSeparator) { 315 change := Change{ 316 Path: info.path(), 317 Kind: ChangeModify, 318 } 319 // Let's insert the directory entry before the recently added entries located inside this dir 320 *changes = append(*changes, change) // just to resize the slice, will be overwritten 321 copy((*changes)[sizeAtEntry+1:], (*changes)[sizeAtEntry:]) 322 (*changes)[sizeAtEntry] = change 323 } 324 } 325 326 // Changes add changes to file information. 327 func (info *FileInfo) Changes(oldInfo *FileInfo) []Change { 328 var changes []Change 329 330 info.addChanges(oldInfo, &changes) 331 332 return changes 333 } 334 335 func newRootFileInfo() *FileInfo { 336 // As this runs on the daemon side, file paths are OS specific. 337 root := &FileInfo{ 338 name: string(os.PathSeparator), 339 children: make(map[string]*FileInfo), 340 } 341 return root 342 } 343 344 // ChangesDirs compares two directories and generates an array of Change objects describing the changes. 345 // If oldDir is "", then all files in newDir will be Add-Changes. 346 func ChangesDirs(newDir, oldDir string) ([]Change, error) { 347 var oldRoot, newRoot *FileInfo 348 if oldDir == "" { 349 emptyDir, err := os.MkdirTemp("", "empty") 350 if err != nil { 351 return nil, err 352 } 353 defer os.Remove(emptyDir) 354 oldDir = emptyDir 355 } 356 oldRoot, newRoot, err := collectFileInfoForChanges(oldDir, newDir) 357 if err != nil { 358 return nil, err 359 } 360 361 return newRoot.Changes(oldRoot), nil 362 } 363 364 // ChangesSize calculates the size in bytes of the provided changes, based on newDir. 365 func ChangesSize(newDir string, changes []Change) int64 { 366 var ( 367 size int64 368 sf = make(map[uint64]struct{}) 369 ) 370 for _, change := range changes { 371 if change.Kind == ChangeModify || change.Kind == ChangeAdd { 372 file := filepath.Join(newDir, change.Path) 373 fileInfo, err := os.Lstat(file) 374 if err != nil { 375 log.G(context.TODO()).Errorf("Can not stat %q: %s", file, err) 376 continue 377 } 378 379 if fileInfo != nil && !fileInfo.IsDir() { 380 if hasHardlinks(fileInfo) { 381 inode := getIno(fileInfo) 382 if _, ok := sf[inode]; !ok { 383 size += fileInfo.Size() 384 sf[inode] = struct{}{} 385 } 386 } else { 387 size += fileInfo.Size() 388 } 389 } 390 } 391 } 392 return size 393 } 394 395 // ExportChanges produces an Archive from the provided changes, relative to dir. 396 func ExportChanges(dir string, changes []Change, idMap idtools.IdentityMapping) (io.ReadCloser, error) { 397 reader, writer := io.Pipe() 398 go func() { 399 ta := newTarAppender(idMap, writer, nil) 400 401 // this buffer is needed for the duration of this piped stream 402 defer pools.BufioWriter32KPool.Put(ta.Buffer) 403 404 sort.Sort(changesByPath(changes)) 405 406 // In general we log errors here but ignore them because 407 // during e.g. a diff operation the container can continue 408 // mutating the filesystem and we can see transient errors 409 // from this 410 for _, change := range changes { 411 if change.Kind == ChangeDelete { 412 whiteOutDir := filepath.Dir(change.Path) 413 whiteOutBase := filepath.Base(change.Path) 414 whiteOut := filepath.Join(whiteOutDir, WhiteoutPrefix+whiteOutBase) 415 timestamp := time.Now() 416 hdr := &tar.Header{ 417 Name: whiteOut[1:], 418 Size: 0, 419 ModTime: timestamp, 420 AccessTime: timestamp, 421 ChangeTime: timestamp, 422 } 423 if err := ta.TarWriter.WriteHeader(hdr); err != nil { 424 log.G(context.TODO()).Debugf("Can't write whiteout header: %s", err) 425 } 426 } else { 427 path := filepath.Join(dir, change.Path) 428 if err := ta.addTarFile(path, change.Path[1:]); err != nil { 429 log.G(context.TODO()).Debugf("Can't add file %s to tar: %s", path, err) 430 } 431 } 432 } 433 434 // Make sure to check the error on Close. 435 if err := ta.TarWriter.Close(); err != nil { 436 log.G(context.TODO()).Debugf("Can't close layer: %s", err) 437 } 438 if err := writer.Close(); err != nil { 439 log.G(context.TODO()).Debugf("failed close Changes writer: %s", err) 440 } 441 }() 442 return reader, nil 443 }