github.com/demonoid81/moby@v0.0.0-20200517203328-62dd8e17c460/pkg/archive/changes.go (about)

     1  package archive // import "github.com/demonoid81/moby/pkg/archive"
     2  
     3  import (
     4  	"archive/tar"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"os"
    10  	"path/filepath"
    11  	"sort"
    12  	"strings"
    13  	"syscall"
    14  	"time"
    15  
    16  	"github.com/demonoid81/moby/pkg/idtools"
    17  	"github.com/demonoid81/moby/pkg/pools"
    18  	"github.com/demonoid81/moby/pkg/system"
    19  	"github.com/sirupsen/logrus"
    20  )
    21  
    22  // ChangeType represents the change type.
    23  type ChangeType int
    24  
    25  const (
    26  	// ChangeModify represents the modify operation.
    27  	ChangeModify = iota
    28  	// ChangeAdd represents the add operation.
    29  	ChangeAdd
    30  	// ChangeDelete represents the delete operation.
    31  	ChangeDelete
    32  )
    33  
    34  func (c ChangeType) String() string {
    35  	switch c {
    36  	case ChangeModify:
    37  		return "C"
    38  	case ChangeAdd:
    39  		return "A"
    40  	case ChangeDelete:
    41  		return "D"
    42  	}
    43  	return ""
    44  }
    45  
    46  // Change represents a change, it wraps the change type and path.
    47  // It describes changes of the files in the path respect to the
    48  // parent layers. The change could be modify, add, delete.
    49  // This is used for layer diff.
    50  type Change struct {
    51  	Path string
    52  	Kind ChangeType
    53  }
    54  
    55  func (change *Change) String() string {
    56  	return fmt.Sprintf("%s %s", change.Kind, change.Path)
    57  }
    58  
    59  // for sort.Sort
    60  type changesByPath []Change
    61  
    62  func (c changesByPath) Less(i, j int) bool { return c[i].Path < c[j].Path }
    63  func (c changesByPath) Len() int           { return len(c) }
    64  func (c changesByPath) Swap(i, j int)      { c[j], c[i] = c[i], c[j] }
    65  
    66  // Gnu tar doesn't have sub-second mtime precision. The go tar
    67  // writer (1.10+) does when using PAX format, but we round times to seconds
    68  // to ensure archives have the same hashes for backwards compatibility.
    69  // See https://github.com/moby/moby/pull/35739/commits/fb170206ba12752214630b269a40ac7be6115ed4.
    70  //
    71  // Non-sub-second is problematic when we apply changes via tar
    72  // files. We handle this by comparing for exact times, *or* same
    73  // second count and either a or b having exactly 0 nanoseconds
    74  func sameFsTime(a, b time.Time) bool {
    75  	return a.Equal(b) ||
    76  		(a.Unix() == b.Unix() &&
    77  			(a.Nanosecond() == 0 || b.Nanosecond() == 0))
    78  }
    79  
    80  func sameFsTimeSpec(a, b syscall.Timespec) bool {
    81  	return a.Sec == b.Sec &&
    82  		(a.Nsec == b.Nsec || a.Nsec == 0 || b.Nsec == 0)
    83  }
    84  
    85  // Changes walks the path rw and determines changes for the files in the path,
    86  // with respect to the parent layers
    87  func Changes(layers []string, rw string) ([]Change, error) {
    88  	return changes(layers, rw, aufsDeletedFile, aufsMetadataSkip)
    89  }
    90  
    91  func aufsMetadataSkip(path string) (skip bool, err error) {
    92  	skip, err = filepath.Match(string(os.PathSeparator)+WhiteoutMetaPrefix+"*", path)
    93  	if err != nil {
    94  		skip = true
    95  	}
    96  	return
    97  }
    98  
    99  func aufsDeletedFile(root, path string, fi os.FileInfo) (string, error) {
   100  	f := filepath.Base(path)
   101  
   102  	// If there is a whiteout, then the file was removed
   103  	if strings.HasPrefix(f, WhiteoutPrefix) {
   104  		originalFile := f[len(WhiteoutPrefix):]
   105  		return filepath.Join(filepath.Dir(path), originalFile), nil
   106  	}
   107  
   108  	return "", nil
   109  }
   110  
   111  type skipChange func(string) (bool, error)
   112  type deleteChange func(string, string, os.FileInfo) (string, error)
   113  
   114  func changes(layers []string, rw string, dc deleteChange, sc skipChange) ([]Change, error) {
   115  	var (
   116  		changes     []Change
   117  		changedDirs = make(map[string]struct{})
   118  	)
   119  
   120  	err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error {
   121  		if err != nil {
   122  			return err
   123  		}
   124  
   125  		// Rebase path
   126  		path, err = filepath.Rel(rw, path)
   127  		if err != nil {
   128  			return err
   129  		}
   130  
   131  		// As this runs on the daemon side, file paths are OS specific.
   132  		path = filepath.Join(string(os.PathSeparator), path)
   133  
   134  		// Skip root
   135  		if path == string(os.PathSeparator) {
   136  			return nil
   137  		}
   138  
   139  		if sc != nil {
   140  			if skip, err := sc(path); skip {
   141  				return err
   142  			}
   143  		}
   144  
   145  		change := Change{
   146  			Path: path,
   147  		}
   148  
   149  		deletedFile, err := dc(rw, path, f)
   150  		if err != nil {
   151  			return err
   152  		}
   153  
   154  		// Find out what kind of modification happened
   155  		if deletedFile != "" {
   156  			change.Path = deletedFile
   157  			change.Kind = ChangeDelete
   158  		} else {
   159  			// Otherwise, the file was added
   160  			change.Kind = ChangeAdd
   161  
   162  			// ...Unless it already existed in a top layer, in which case, it's a modification
   163  			for _, layer := range layers {
   164  				stat, err := os.Stat(filepath.Join(layer, path))
   165  				if err != nil && !os.IsNotExist(err) {
   166  					return err
   167  				}
   168  				if err == nil {
   169  					// The file existed in the top layer, so that's a modification
   170  
   171  					// However, if it's a directory, maybe it wasn't actually modified.
   172  					// If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
   173  					if stat.IsDir() && f.IsDir() {
   174  						if f.Size() == stat.Size() && f.Mode() == stat.Mode() && sameFsTime(f.ModTime(), stat.ModTime()) {
   175  							// Both directories are the same, don't record the change
   176  							return nil
   177  						}
   178  					}
   179  					change.Kind = ChangeModify
   180  					break
   181  				}
   182  			}
   183  		}
   184  
   185  		// If /foo/bar/file.txt is modified, then /foo/bar must be part of the changed files.
   186  		// This block is here to ensure the change is recorded even if the
   187  		// modify time, mode and size of the parent directory in the rw and ro layers are all equal.
   188  		// Check https://github.com/demonoid81/moby/pull/13590 for details.
   189  		if f.IsDir() {
   190  			changedDirs[path] = struct{}{}
   191  		}
   192  		if change.Kind == ChangeAdd || change.Kind == ChangeDelete {
   193  			parent := filepath.Dir(path)
   194  			if _, ok := changedDirs[parent]; !ok && parent != "/" {
   195  				changes = append(changes, Change{Path: parent, Kind: ChangeModify})
   196  				changedDirs[parent] = struct{}{}
   197  			}
   198  		}
   199  
   200  		// Record change
   201  		changes = append(changes, change)
   202  		return nil
   203  	})
   204  	if err != nil && !os.IsNotExist(err) {
   205  		return nil, err
   206  	}
   207  	return changes, nil
   208  }
   209  
   210  // FileInfo describes the information of a file.
   211  type FileInfo struct {
   212  	parent     *FileInfo
   213  	name       string
   214  	stat       *system.StatT
   215  	children   map[string]*FileInfo
   216  	capability []byte
   217  	added      bool
   218  }
   219  
   220  // LookUp looks up the file information of a file.
   221  func (info *FileInfo) LookUp(path string) *FileInfo {
   222  	// As this runs on the daemon side, file paths are OS specific.
   223  	parent := info
   224  	if path == string(os.PathSeparator) {
   225  		return info
   226  	}
   227  
   228  	pathElements := strings.Split(path, string(os.PathSeparator))
   229  	for _, elem := range pathElements {
   230  		if elem != "" {
   231  			child := parent.children[elem]
   232  			if child == nil {
   233  				return nil
   234  			}
   235  			parent = child
   236  		}
   237  	}
   238  	return parent
   239  }
   240  
   241  func (info *FileInfo) path() string {
   242  	if info.parent == nil {
   243  		// As this runs on the daemon side, file paths are OS specific.
   244  		return string(os.PathSeparator)
   245  	}
   246  	return filepath.Join(info.parent.path(), info.name)
   247  }
   248  
   249  func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) {
   250  
   251  	sizeAtEntry := len(*changes)
   252  
   253  	if oldInfo == nil {
   254  		// add
   255  		change := Change{
   256  			Path: info.path(),
   257  			Kind: ChangeAdd,
   258  		}
   259  		*changes = append(*changes, change)
   260  		info.added = true
   261  	}
   262  
   263  	// We make a copy so we can modify it to detect additions
   264  	// also, we only recurse on the old dir if the new info is a directory
   265  	// otherwise any previous delete/change is considered recursive
   266  	oldChildren := make(map[string]*FileInfo)
   267  	if oldInfo != nil && info.isDir() {
   268  		for k, v := range oldInfo.children {
   269  			oldChildren[k] = v
   270  		}
   271  	}
   272  
   273  	for name, newChild := range info.children {
   274  		oldChild := oldChildren[name]
   275  		if oldChild != nil {
   276  			// change?
   277  			oldStat := oldChild.stat
   278  			newStat := newChild.stat
   279  			// Note: We can't compare inode or ctime or blocksize here, because these change
   280  			// when copying a file into a container. However, that is not generally a problem
   281  			// because any content change will change mtime, and any status change should
   282  			// be visible when actually comparing the stat fields. The only time this
   283  			// breaks down is if some code intentionally hides a change by setting
   284  			// back mtime
   285  			if statDifferent(oldStat, newStat) ||
   286  				!bytes.Equal(oldChild.capability, newChild.capability) {
   287  				change := Change{
   288  					Path: newChild.path(),
   289  					Kind: ChangeModify,
   290  				}
   291  				*changes = append(*changes, change)
   292  				newChild.added = true
   293  			}
   294  
   295  			// Remove from copy so we can detect deletions
   296  			delete(oldChildren, name)
   297  		}
   298  
   299  		newChild.addChanges(oldChild, changes)
   300  	}
   301  	for _, oldChild := range oldChildren {
   302  		// delete
   303  		change := Change{
   304  			Path: oldChild.path(),
   305  			Kind: ChangeDelete,
   306  		}
   307  		*changes = append(*changes, change)
   308  	}
   309  
   310  	// If there were changes inside this directory, we need to add it, even if the directory
   311  	// itself wasn't changed. This is needed to properly save and restore filesystem permissions.
   312  	// As this runs on the daemon side, file paths are OS specific.
   313  	if len(*changes) > sizeAtEntry && info.isDir() && !info.added && info.path() != string(os.PathSeparator) {
   314  		change := Change{
   315  			Path: info.path(),
   316  			Kind: ChangeModify,
   317  		}
   318  		// Let's insert the directory entry before the recently added entries located inside this dir
   319  		*changes = append(*changes, change) // just to resize the slice, will be overwritten
   320  		copy((*changes)[sizeAtEntry+1:], (*changes)[sizeAtEntry:])
   321  		(*changes)[sizeAtEntry] = change
   322  	}
   323  
   324  }
   325  
   326  // Changes add changes to file information.
   327  func (info *FileInfo) Changes(oldInfo *FileInfo) []Change {
   328  	var changes []Change
   329  
   330  	info.addChanges(oldInfo, &changes)
   331  
   332  	return changes
   333  }
   334  
   335  func newRootFileInfo() *FileInfo {
   336  	// As this runs on the daemon side, file paths are OS specific.
   337  	root := &FileInfo{
   338  		name:     string(os.PathSeparator),
   339  		children: make(map[string]*FileInfo),
   340  	}
   341  	return root
   342  }
   343  
   344  // ChangesDirs compares two directories and generates an array of Change objects describing the changes.
   345  // If oldDir is "", then all files in newDir will be Add-Changes.
   346  func ChangesDirs(newDir, oldDir string) ([]Change, error) {
   347  	var (
   348  		oldRoot, newRoot *FileInfo
   349  	)
   350  	if oldDir == "" {
   351  		emptyDir, err := ioutil.TempDir("", "empty")
   352  		if err != nil {
   353  			return nil, err
   354  		}
   355  		defer os.Remove(emptyDir)
   356  		oldDir = emptyDir
   357  	}
   358  	oldRoot, newRoot, err := collectFileInfoForChanges(oldDir, newDir)
   359  	if err != nil {
   360  		return nil, err
   361  	}
   362  
   363  	return newRoot.Changes(oldRoot), nil
   364  }
   365  
   366  // ChangesSize calculates the size in bytes of the provided changes, based on newDir.
   367  func ChangesSize(newDir string, changes []Change) int64 {
   368  	var (
   369  		size int64
   370  		sf   = make(map[uint64]struct{})
   371  	)
   372  	for _, change := range changes {
   373  		if change.Kind == ChangeModify || change.Kind == ChangeAdd {
   374  			file := filepath.Join(newDir, change.Path)
   375  			fileInfo, err := os.Lstat(file)
   376  			if err != nil {
   377  				logrus.Errorf("Can not stat %q: %s", file, err)
   378  				continue
   379  			}
   380  
   381  			if fileInfo != nil && !fileInfo.IsDir() {
   382  				if hasHardlinks(fileInfo) {
   383  					inode := getIno(fileInfo)
   384  					if _, ok := sf[inode]; !ok {
   385  						size += fileInfo.Size()
   386  						sf[inode] = struct{}{}
   387  					}
   388  				} else {
   389  					size += fileInfo.Size()
   390  				}
   391  			}
   392  		}
   393  	}
   394  	return size
   395  }
   396  
   397  // ExportChanges produces an Archive from the provided changes, relative to dir.
   398  func ExportChanges(dir string, changes []Change, uidMaps, gidMaps []idtools.IDMap) (io.ReadCloser, error) {
   399  	reader, writer := io.Pipe()
   400  	go func() {
   401  		ta := newTarAppender(idtools.NewIDMappingsFromMaps(uidMaps, gidMaps), writer, nil)
   402  
   403  		// this buffer is needed for the duration of this piped stream
   404  		defer pools.BufioWriter32KPool.Put(ta.Buffer)
   405  
   406  		sort.Sort(changesByPath(changes))
   407  
   408  		// In general we log errors here but ignore them because
   409  		// during e.g. a diff operation the container can continue
   410  		// mutating the filesystem and we can see transient errors
   411  		// from this
   412  		for _, change := range changes {
   413  			if change.Kind == ChangeDelete {
   414  				whiteOutDir := filepath.Dir(change.Path)
   415  				whiteOutBase := filepath.Base(change.Path)
   416  				whiteOut := filepath.Join(whiteOutDir, WhiteoutPrefix+whiteOutBase)
   417  				timestamp := time.Now()
   418  				hdr := &tar.Header{
   419  					Name:       whiteOut[1:],
   420  					Size:       0,
   421  					ModTime:    timestamp,
   422  					AccessTime: timestamp,
   423  					ChangeTime: timestamp,
   424  				}
   425  				if err := ta.TarWriter.WriteHeader(hdr); err != nil {
   426  					logrus.Debugf("Can't write whiteout header: %s", err)
   427  				}
   428  			} else {
   429  				path := filepath.Join(dir, change.Path)
   430  				if err := ta.addTarFile(path, change.Path[1:]); err != nil {
   431  					logrus.Debugf("Can't add file %s to tar: %s", path, err)
   432  				}
   433  			}
   434  		}
   435  
   436  		// Make sure to check the error on Close.
   437  		if err := ta.TarWriter.Close(); err != nil {
   438  			logrus.Debugf("Can't close layer: %s", err)
   439  		}
   440  		if err := writer.Close(); err != nil {
   441  			logrus.Debugf("failed close Changes writer: %s", err)
   442  		}
   443  	}()
   444  	return reader, nil
   445  }