github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/pkg/archive/changes_linux.go (about)

     1  //go:build !wasip1
     2  
     3  package archive // import "github.com/Prakhar-Agarwal-byte/moby/pkg/archive"
     4  
     5  import (
     6  	"bytes"
     7  	"fmt"
     8  	"os"
     9  	"path/filepath"
    10  	"sort"
    11  	"syscall"
    12  	"unsafe"
    13  
    14  	"github.com/Prakhar-Agarwal-byte/moby/pkg/system"
    15  	"golang.org/x/sys/unix"
    16  )
    17  
    18  // walker is used to implement collectFileInfoForChanges on linux. Where this
    19  // method in general returns the entire contents of two directory trees, we
    20  // optimize some FS calls out on linux. In particular, we take advantage of the
    21  // fact that getdents(2) returns the inode of each file in the directory being
    22  // walked, which, when walking two trees in parallel to generate a list of
    23  // changes, can be used to prune subtrees without ever having to lstat(2) them
    24  // directly. Eliminating stat calls in this way can save up to seconds on large
    25  // images.
    26  type walker struct {
    27  	dir1  string
    28  	dir2  string
    29  	root1 *FileInfo
    30  	root2 *FileInfo
    31  }
    32  
    33  // collectFileInfoForChanges returns a complete representation of the trees
    34  // rooted at dir1 and dir2, with one important exception: any subtree or
    35  // leaf where the inode and device numbers are an exact match between dir1
    36  // and dir2 will be pruned from the results. This method is *only* to be used
    37  // to generating a list of changes between the two directories, as it does not
    38  // reflect the full contents.
    39  func collectFileInfoForChanges(dir1, dir2 string) (*FileInfo, *FileInfo, error) {
    40  	w := &walker{
    41  		dir1:  dir1,
    42  		dir2:  dir2,
    43  		root1: newRootFileInfo(),
    44  		root2: newRootFileInfo(),
    45  	}
    46  
    47  	i1, err := os.Lstat(w.dir1)
    48  	if err != nil {
    49  		return nil, nil, err
    50  	}
    51  	i2, err := os.Lstat(w.dir2)
    52  	if err != nil {
    53  		return nil, nil, err
    54  	}
    55  
    56  	if err := w.walk("/", i1, i2); err != nil {
    57  		return nil, nil, err
    58  	}
    59  
    60  	return w.root1, w.root2, nil
    61  }
    62  
    63  // Given a FileInfo, its path info, and a reference to the root of the tree
    64  // being constructed, register this file with the tree.
    65  func walkchunk(path string, fi os.FileInfo, dir string, root *FileInfo) error {
    66  	if fi == nil {
    67  		return nil
    68  	}
    69  	parent := root.LookUp(filepath.Dir(path))
    70  	if parent == nil {
    71  		return fmt.Errorf("walkchunk: Unexpectedly no parent for %s", path)
    72  	}
    73  	info := &FileInfo{
    74  		name:     filepath.Base(path),
    75  		children: make(map[string]*FileInfo),
    76  		parent:   parent,
    77  	}
    78  	cpath := filepath.Join(dir, path)
    79  	stat, err := system.FromStatT(fi.Sys().(*syscall.Stat_t))
    80  	if err != nil {
    81  		return err
    82  	}
    83  	info.stat = stat
    84  	info.capability, _ = system.Lgetxattr(cpath, "security.capability") // lgetxattr(2): fs access
    85  	parent.children[info.name] = info
    86  	return nil
    87  }
    88  
    89  // Walk a subtree rooted at the same path in both trees being iterated. For
    90  // example, /docker/overlay/1234/a/b/c/d and /docker/overlay/8888/a/b/c/d
    91  func (w *walker) walk(path string, i1, i2 os.FileInfo) (err error) {
    92  	// Register these nodes with the return trees, unless we're still at the
    93  	// (already-created) roots:
    94  	if path != "/" {
    95  		if err := walkchunk(path, i1, w.dir1, w.root1); err != nil {
    96  			return err
    97  		}
    98  		if err := walkchunk(path, i2, w.dir2, w.root2); err != nil {
    99  			return err
   100  		}
   101  	}
   102  
   103  	is1Dir := i1 != nil && i1.IsDir()
   104  	is2Dir := i2 != nil && i2.IsDir()
   105  
   106  	sameDevice := false
   107  	if i1 != nil && i2 != nil {
   108  		si1 := i1.Sys().(*syscall.Stat_t)
   109  		si2 := i2.Sys().(*syscall.Stat_t)
   110  		if si1.Dev == si2.Dev {
   111  			sameDevice = true
   112  		}
   113  	}
   114  
   115  	// If these files are both non-existent, or leaves (non-dirs), we are done.
   116  	if !is1Dir && !is2Dir {
   117  		return nil
   118  	}
   119  
   120  	// Fetch the names of all the files contained in both directories being walked:
   121  	var names1, names2 []nameIno
   122  	if is1Dir {
   123  		names1, err = readdirnames(filepath.Join(w.dir1, path)) // getdents(2): fs access
   124  		if err != nil {
   125  			return err
   126  		}
   127  	}
   128  	if is2Dir {
   129  		names2, err = readdirnames(filepath.Join(w.dir2, path)) // getdents(2): fs access
   130  		if err != nil {
   131  			return err
   132  		}
   133  	}
   134  
   135  	// We have lists of the files contained in both parallel directories, sorted
   136  	// in the same order. Walk them in parallel, generating a unique merged list
   137  	// of all items present in either or both directories.
   138  	var names []string
   139  	ix1 := 0
   140  	ix2 := 0
   141  
   142  	for {
   143  		if ix1 >= len(names1) {
   144  			break
   145  		}
   146  		if ix2 >= len(names2) {
   147  			break
   148  		}
   149  
   150  		ni1 := names1[ix1]
   151  		ni2 := names2[ix2]
   152  
   153  		switch bytes.Compare([]byte(ni1.name), []byte(ni2.name)) {
   154  		case -1: // ni1 < ni2 -- advance ni1
   155  			// we will not encounter ni1 in names2
   156  			names = append(names, ni1.name)
   157  			ix1++
   158  		case 0: // ni1 == ni2
   159  			if ni1.ino != ni2.ino || !sameDevice {
   160  				names = append(names, ni1.name)
   161  			}
   162  			ix1++
   163  			ix2++
   164  		case 1: // ni1 > ni2 -- advance ni2
   165  			// we will not encounter ni2 in names1
   166  			names = append(names, ni2.name)
   167  			ix2++
   168  		}
   169  	}
   170  	for ix1 < len(names1) {
   171  		names = append(names, names1[ix1].name)
   172  		ix1++
   173  	}
   174  	for ix2 < len(names2) {
   175  		names = append(names, names2[ix2].name)
   176  		ix2++
   177  	}
   178  
   179  	// For each of the names present in either or both of the directories being
   180  	// iterated, stat the name under each root, and recurse the pair of them:
   181  	for _, name := range names {
   182  		fname := filepath.Join(path, name)
   183  		var cInfo1, cInfo2 os.FileInfo
   184  		if is1Dir {
   185  			cInfo1, err = os.Lstat(filepath.Join(w.dir1, fname)) // lstat(2): fs access
   186  			if err != nil && !os.IsNotExist(err) {
   187  				return err
   188  			}
   189  		}
   190  		if is2Dir {
   191  			cInfo2, err = os.Lstat(filepath.Join(w.dir2, fname)) // lstat(2): fs access
   192  			if err != nil && !os.IsNotExist(err) {
   193  				return err
   194  			}
   195  		}
   196  		if err = w.walk(fname, cInfo1, cInfo2); err != nil {
   197  			return err
   198  		}
   199  	}
   200  	return nil
   201  }
   202  
   203  // {name,inode} pairs used to support the early-pruning logic of the walker type
   204  type nameIno struct {
   205  	name string
   206  	ino  uint64
   207  }
   208  
   209  type nameInoSlice []nameIno
   210  
   211  func (s nameInoSlice) Len() int           { return len(s) }
   212  func (s nameInoSlice) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
   213  func (s nameInoSlice) Less(i, j int) bool { return s[i].name < s[j].name }
   214  
   215  // readdirnames is a hacked-apart version of the Go stdlib code, exposing inode
   216  // numbers further up the stack when reading directory contents. Unlike
   217  // os.Readdirnames, which returns a list of filenames, this function returns a
   218  // list of {filename,inode} pairs.
   219  func readdirnames(dirname string) (names []nameIno, err error) {
   220  	var (
   221  		size = 100
   222  		buf  = make([]byte, 4096)
   223  		nbuf int
   224  		bufp int
   225  		nb   int
   226  	)
   227  
   228  	f, err := os.Open(dirname)
   229  	if err != nil {
   230  		return nil, err
   231  	}
   232  	defer f.Close()
   233  
   234  	names = make([]nameIno, 0, size) // Empty with room to grow.
   235  	for {
   236  		// Refill the buffer if necessary
   237  		if bufp >= nbuf {
   238  			bufp = 0
   239  			nbuf, err = unix.ReadDirent(int(f.Fd()), buf) // getdents on linux
   240  			if nbuf < 0 {
   241  				nbuf = 0
   242  			}
   243  			if err != nil {
   244  				return nil, os.NewSyscallError("readdirent", err)
   245  			}
   246  			if nbuf <= 0 {
   247  				break // EOF
   248  			}
   249  		}
   250  
   251  		// Drain the buffer
   252  		nb, names = parseDirent(buf[bufp:nbuf], names)
   253  		bufp += nb
   254  	}
   255  
   256  	sl := nameInoSlice(names)
   257  	sort.Sort(sl)
   258  	return sl, nil
   259  }
   260  
   261  // parseDirent is a minor modification of unix.ParseDirent (linux version)
   262  // which returns {name,inode} pairs instead of just names.
   263  func parseDirent(buf []byte, names []nameIno) (consumed int, newnames []nameIno) {
   264  	origlen := len(buf)
   265  	for len(buf) > 0 {
   266  		dirent := (*unix.Dirent)(unsafe.Pointer(&buf[0]))
   267  		buf = buf[dirent.Reclen:]
   268  		if dirent.Ino == 0 { // File absent in directory.
   269  			continue
   270  		}
   271  		bytes := (*[10000]byte)(unsafe.Pointer(&dirent.Name[0]))
   272  		name := string(bytes[0:clen(bytes[:])])
   273  		if name == "." || name == ".." { // Useless names
   274  			continue
   275  		}
   276  		names = append(names, nameIno{name, dirent.Ino})
   277  	}
   278  	return origlen - len(buf), names
   279  }
   280  
   281  func clen(n []byte) int {
   282  	for i := 0; i < len(n); i++ {
   283  		if n[i] == 0 {
   284  			return i
   285  		}
   286  	}
   287  	return len(n)
   288  }