github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/pkg/archive/changes.go (about)

     1  package archive // import "github.com/docker/docker/pkg/archive"
     2  
     3  import (
     4  	"archive/tar"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"sort"
    11  	"strings"
    12  	"syscall"
    13  	"time"
    14  
    15  	"github.com/docker/docker/pkg/idtools"
    16  	"github.com/docker/docker/pkg/pools"
    17  	"github.com/docker/docker/pkg/system"
    18  	"github.com/sirupsen/logrus"
    19  )
    20  
    21  // ChangeType represents the change type.
    22  type ChangeType int
    23  
    24  const (
    25  	// ChangeModify represents the modify operation.
    26  	ChangeModify = iota
    27  	// ChangeAdd represents the add operation.
    28  	ChangeAdd
    29  	// ChangeDelete represents the delete operation.
    30  	ChangeDelete
    31  )
    32  
    33  func (c ChangeType) String() string {
    34  	switch c {
    35  	case ChangeModify:
    36  		return "C"
    37  	case ChangeAdd:
    38  		return "A"
    39  	case ChangeDelete:
    40  		return "D"
    41  	}
    42  	return ""
    43  }
    44  
    45  // Change represents a change, it wraps the change type and path.
    46  // It describes changes of the files in the path respect to the
    47  // parent layers. The change could be modify, add, delete.
    48  // This is used for layer diff.
    49  type Change struct {
    50  	Path string
    51  	Kind ChangeType
    52  }
    53  
    54  func (change *Change) String() string {
    55  	return fmt.Sprintf("%s %s", change.Kind, change.Path)
    56  }
    57  
    58  // for sort.Sort
    59  type changesByPath []Change
    60  
    61  func (c changesByPath) Less(i, j int) bool { return c[i].Path < c[j].Path }
    62  func (c changesByPath) Len() int           { return len(c) }
    63  func (c changesByPath) Swap(i, j int)      { c[j], c[i] = c[i], c[j] }
    64  
    65  // Gnu tar doesn't have sub-second mtime precision. The go tar
    66  // writer (1.10+) does when using PAX format, but we round times to seconds
    67  // to ensure archives have the same hashes for backwards compatibility.
    68  // See https://github.com/moby/moby/pull/35739/commits/fb170206ba12752214630b269a40ac7be6115ed4.
    69  //
    70  // Non-sub-second is problematic when we apply changes via tar
    71  // files. We handle this by comparing for exact times, *or* same
    72  // second count and either a or b having exactly 0 nanoseconds
    73  func sameFsTime(a, b time.Time) bool {
    74  	return a.Equal(b) ||
    75  		(a.Unix() == b.Unix() &&
    76  			(a.Nanosecond() == 0 || b.Nanosecond() == 0))
    77  }
    78  
    79  func sameFsTimeSpec(a, b syscall.Timespec) bool {
    80  	return a.Sec == b.Sec &&
    81  		(a.Nsec == b.Nsec || a.Nsec == 0 || b.Nsec == 0)
    82  }
    83  
    84  // Changes walks the path rw and determines changes for the files in the path,
    85  // with respect to the parent layers
    86  func Changes(layers []string, rw string) ([]Change, error) {
    87  	return changes(layers, rw, aufsDeletedFile, aufsMetadataSkip)
    88  }
    89  
    90  func aufsMetadataSkip(path string) (skip bool, err error) {
    91  	skip, err = filepath.Match(string(os.PathSeparator)+WhiteoutMetaPrefix+"*", path)
    92  	if err != nil {
    93  		skip = true
    94  	}
    95  	return
    96  }
    97  
    98  func aufsDeletedFile(root, path string, fi os.FileInfo) (string, error) {
    99  	f := filepath.Base(path)
   100  
   101  	// If there is a whiteout, then the file was removed
   102  	if strings.HasPrefix(f, WhiteoutPrefix) {
   103  		originalFile := f[len(WhiteoutPrefix):]
   104  		return filepath.Join(filepath.Dir(path), originalFile), nil
   105  	}
   106  
   107  	return "", nil
   108  }
   109  
   110  type skipChange func(string) (bool, error)
   111  type deleteChange func(string, string, os.FileInfo) (string, error)
   112  
   113  func changes(layers []string, rw string, dc deleteChange, sc skipChange) ([]Change, error) {
   114  	var (
   115  		changes     []Change
   116  		changedDirs = make(map[string]struct{})
   117  	)
   118  
   119  	err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error {
   120  		if err != nil {
   121  			return err
   122  		}
   123  
   124  		// Rebase path
   125  		path, err = filepath.Rel(rw, path)
   126  		if err != nil {
   127  			return err
   128  		}
   129  
   130  		// As this runs on the daemon side, file paths are OS specific.
   131  		path = filepath.Join(string(os.PathSeparator), path)
   132  
   133  		// Skip root
   134  		if path == string(os.PathSeparator) {
   135  			return nil
   136  		}
   137  
   138  		if sc != nil {
   139  			if skip, err := sc(path); skip {
   140  				return err
   141  			}
   142  		}
   143  
   144  		change := Change{
   145  			Path: path,
   146  		}
   147  
   148  		deletedFile, err := dc(rw, path, f)
   149  		if err != nil {
   150  			return err
   151  		}
   152  
   153  		// Find out what kind of modification happened
   154  		if deletedFile != "" {
   155  			change.Path = deletedFile
   156  			change.Kind = ChangeDelete
   157  		} else {
   158  			// Otherwise, the file was added
   159  			change.Kind = ChangeAdd
   160  
   161  			// ...Unless it already existed in a top layer, in which case, it's a modification
   162  			for _, layer := range layers {
   163  				stat, err := os.Stat(filepath.Join(layer, path))
   164  				if err != nil && !os.IsNotExist(err) {
   165  					return err
   166  				}
   167  				if err == nil {
   168  					// The file existed in the top layer, so that's a modification
   169  
   170  					// However, if it's a directory, maybe it wasn't actually modified.
   171  					// If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
   172  					if stat.IsDir() && f.IsDir() {
   173  						if f.Size() == stat.Size() && f.Mode() == stat.Mode() && sameFsTime(f.ModTime(), stat.ModTime()) {
   174  							// Both directories are the same, don't record the change
   175  							return nil
   176  						}
   177  					}
   178  					change.Kind = ChangeModify
   179  					break
   180  				}
   181  			}
   182  		}
   183  
   184  		// If /foo/bar/file.txt is modified, then /foo/bar must be part of the changed files.
   185  		// This block is here to ensure the change is recorded even if the
   186  		// modify time, mode and size of the parent directory in the rw and ro layers are all equal.
   187  		// Check https://github.com/docker/docker/pull/13590 for details.
   188  		if f.IsDir() {
   189  			changedDirs[path] = struct{}{}
   190  		}
   191  		if change.Kind == ChangeAdd || change.Kind == ChangeDelete {
   192  			parent := filepath.Dir(path)
   193  			if _, ok := changedDirs[parent]; !ok && parent != "/" {
   194  				changes = append(changes, Change{Path: parent, Kind: ChangeModify})
   195  				changedDirs[parent] = struct{}{}
   196  			}
   197  		}
   198  
   199  		// Record change
   200  		changes = append(changes, change)
   201  		return nil
   202  	})
   203  	if err != nil && !os.IsNotExist(err) {
   204  		return nil, err
   205  	}
   206  	return changes, nil
   207  }
   208  
   209  // FileInfo describes the information of a file.
   210  type FileInfo struct {
   211  	parent     *FileInfo
   212  	name       string
   213  	stat       *system.StatT
   214  	children   map[string]*FileInfo
   215  	capability []byte
   216  	added      bool
   217  }
   218  
   219  // LookUp looks up the file information of a file.
   220  func (info *FileInfo) LookUp(path string) *FileInfo {
   221  	// As this runs on the daemon side, file paths are OS specific.
   222  	parent := info
   223  	if path == string(os.PathSeparator) {
   224  		return info
   225  	}
   226  
   227  	pathElements := strings.Split(path, string(os.PathSeparator))
   228  	for _, elem := range pathElements {
   229  		if elem != "" {
   230  			child := parent.children[elem]
   231  			if child == nil {
   232  				return nil
   233  			}
   234  			parent = child
   235  		}
   236  	}
   237  	return parent
   238  }
   239  
   240  func (info *FileInfo) path() string {
   241  	if info.parent == nil {
   242  		// As this runs on the daemon side, file paths are OS specific.
   243  		return string(os.PathSeparator)
   244  	}
   245  	return filepath.Join(info.parent.path(), info.name)
   246  }
   247  
   248  func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) {
   249  	sizeAtEntry := len(*changes)
   250  
   251  	if oldInfo == nil {
   252  		// add
   253  		change := Change{
   254  			Path: info.path(),
   255  			Kind: ChangeAdd,
   256  		}
   257  		*changes = append(*changes, change)
   258  		info.added = true
   259  	}
   260  
   261  	// We make a copy so we can modify it to detect additions
   262  	// also, we only recurse on the old dir if the new info is a directory
   263  	// otherwise any previous delete/change is considered recursive
   264  	oldChildren := make(map[string]*FileInfo)
   265  	if oldInfo != nil && info.isDir() {
   266  		for k, v := range oldInfo.children {
   267  			oldChildren[k] = v
   268  		}
   269  	}
   270  
   271  	for name, newChild := range info.children {
   272  		oldChild := oldChildren[name]
   273  		if oldChild != nil {
   274  			// change?
   275  			oldStat := oldChild.stat
   276  			newStat := newChild.stat
   277  			// Note: We can't compare inode or ctime or blocksize here, because these change
   278  			// when copying a file into a container. However, that is not generally a problem
   279  			// because any content change will change mtime, and any status change should
   280  			// be visible when actually comparing the stat fields. The only time this
   281  			// breaks down is if some code intentionally hides a change by setting
   282  			// back mtime
   283  			if statDifferent(oldStat, newStat) ||
   284  				!bytes.Equal(oldChild.capability, newChild.capability) {
   285  				change := Change{
   286  					Path: newChild.path(),
   287  					Kind: ChangeModify,
   288  				}
   289  				*changes = append(*changes, change)
   290  				newChild.added = true
   291  			}
   292  
   293  			// Remove from copy so we can detect deletions
   294  			delete(oldChildren, name)
   295  		}
   296  
   297  		newChild.addChanges(oldChild, changes)
   298  	}
   299  	for _, oldChild := range oldChildren {
   300  		// delete
   301  		change := Change{
   302  			Path: oldChild.path(),
   303  			Kind: ChangeDelete,
   304  		}
   305  		*changes = append(*changes, change)
   306  	}
   307  
   308  	// If there were changes inside this directory, we need to add it, even if the directory
   309  	// itself wasn't changed. This is needed to properly save and restore filesystem permissions.
   310  	// As this runs on the daemon side, file paths are OS specific.
   311  	if len(*changes) > sizeAtEntry && info.isDir() && !info.added && info.path() != string(os.PathSeparator) {
   312  		change := Change{
   313  			Path: info.path(),
   314  			Kind: ChangeModify,
   315  		}
   316  		// Let's insert the directory entry before the recently added entries located inside this dir
   317  		*changes = append(*changes, change) // just to resize the slice, will be overwritten
   318  		copy((*changes)[sizeAtEntry+1:], (*changes)[sizeAtEntry:])
   319  		(*changes)[sizeAtEntry] = change
   320  	}
   321  }
   322  
   323  // Changes add changes to file information.
   324  func (info *FileInfo) Changes(oldInfo *FileInfo) []Change {
   325  	var changes []Change
   326  
   327  	info.addChanges(oldInfo, &changes)
   328  
   329  	return changes
   330  }
   331  
   332  func newRootFileInfo() *FileInfo {
   333  	// As this runs on the daemon side, file paths are OS specific.
   334  	root := &FileInfo{
   335  		name:     string(os.PathSeparator),
   336  		children: make(map[string]*FileInfo),
   337  	}
   338  	return root
   339  }
   340  
   341  // ChangesDirs compares two directories and generates an array of Change objects describing the changes.
   342  // If oldDir is "", then all files in newDir will be Add-Changes.
   343  func ChangesDirs(newDir, oldDir string) ([]Change, error) {
   344  	var (
   345  		oldRoot, newRoot *FileInfo
   346  	)
   347  	if oldDir == "" {
   348  		emptyDir, err := os.MkdirTemp("", "empty")
   349  		if err != nil {
   350  			return nil, err
   351  		}
   352  		defer os.Remove(emptyDir)
   353  		oldDir = emptyDir
   354  	}
   355  	oldRoot, newRoot, err := collectFileInfoForChanges(oldDir, newDir)
   356  	if err != nil {
   357  		return nil, err
   358  	}
   359  
   360  	return newRoot.Changes(oldRoot), nil
   361  }
   362  
   363  // ChangesSize calculates the size in bytes of the provided changes, based on newDir.
   364  func ChangesSize(newDir string, changes []Change) int64 {
   365  	var (
   366  		size int64
   367  		sf   = make(map[uint64]struct{})
   368  	)
   369  	for _, change := range changes {
   370  		if change.Kind == ChangeModify || change.Kind == ChangeAdd {
   371  			file := filepath.Join(newDir, change.Path)
   372  			fileInfo, err := os.Lstat(file)
   373  			if err != nil {
   374  				logrus.Errorf("Can not stat %q: %s", file, err)
   375  				continue
   376  			}
   377  
   378  			if fileInfo != nil && !fileInfo.IsDir() {
   379  				if hasHardlinks(fileInfo) {
   380  					inode := getIno(fileInfo)
   381  					if _, ok := sf[inode]; !ok {
   382  						size += fileInfo.Size()
   383  						sf[inode] = struct{}{}
   384  					}
   385  				} else {
   386  					size += fileInfo.Size()
   387  				}
   388  			}
   389  		}
   390  	}
   391  	return size
   392  }
   393  
   394  // ExportChanges produces an Archive from the provided changes, relative to dir.
   395  func ExportChanges(dir string, changes []Change, idMap idtools.IdentityMapping) (io.ReadCloser, error) {
   396  	reader, writer := io.Pipe()
   397  	go func() {
   398  		ta := newTarAppender(idMap, writer, nil)
   399  
   400  		// this buffer is needed for the duration of this piped stream
   401  		defer pools.BufioWriter32KPool.Put(ta.Buffer)
   402  
   403  		sort.Sort(changesByPath(changes))
   404  
   405  		// In general we log errors here but ignore them because
   406  		// during e.g. a diff operation the container can continue
   407  		// mutating the filesystem and we can see transient errors
   408  		// from this
   409  		for _, change := range changes {
   410  			if change.Kind == ChangeDelete {
   411  				whiteOutDir := filepath.Dir(change.Path)
   412  				whiteOutBase := filepath.Base(change.Path)
   413  				whiteOut := filepath.Join(whiteOutDir, WhiteoutPrefix+whiteOutBase)
   414  				timestamp := time.Now()
   415  				hdr := &tar.Header{
   416  					Name:       whiteOut[1:],
   417  					Size:       0,
   418  					ModTime:    timestamp,
   419  					AccessTime: timestamp,
   420  					ChangeTime: timestamp,
   421  				}
   422  				if err := ta.TarWriter.WriteHeader(hdr); err != nil {
   423  					logrus.Debugf("Can't write whiteout header: %s", err)
   424  				}
   425  			} else {
   426  				path := filepath.Join(dir, change.Path)
   427  				if err := ta.addTarFile(path, change.Path[1:]); err != nil {
   428  					logrus.Debugf("Can't add file %s to tar: %s", path, err)
   429  				}
   430  			}
   431  		}
   432  
   433  		// Make sure to check the error on Close.
   434  		if err := ta.TarWriter.Close(); err != nil {
   435  			logrus.Debugf("Can't close layer: %s", err)
   436  		}
   437  		if err := writer.Close(); err != nil {
   438  			logrus.Debugf("failed close Changes writer: %s", err)
   439  		}
   440  	}()
   441  	return reader, nil
   442  }