github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/pkg/archive/changes_linux.go (about) 1 //go:build !wasip1 2 3 package archive // import "github.com/Prakhar-Agarwal-byte/moby/pkg/archive" 4 5 import ( 6 "bytes" 7 "fmt" 8 "os" 9 "path/filepath" 10 "sort" 11 "syscall" 12 "unsafe" 13 14 "github.com/Prakhar-Agarwal-byte/moby/pkg/system" 15 "golang.org/x/sys/unix" 16 ) 17 18 // walker is used to implement collectFileInfoForChanges on linux. Where this 19 // method in general returns the entire contents of two directory trees, we 20 // optimize some FS calls out on linux. In particular, we take advantage of the 21 // fact that getdents(2) returns the inode of each file in the directory being 22 // walked, which, when walking two trees in parallel to generate a list of 23 // changes, can be used to prune subtrees without ever having to lstat(2) them 24 // directly. Eliminating stat calls in this way can save up to seconds on large 25 // images. 26 type walker struct { 27 dir1 string 28 dir2 string 29 root1 *FileInfo 30 root2 *FileInfo 31 } 32 33 // collectFileInfoForChanges returns a complete representation of the trees 34 // rooted at dir1 and dir2, with one important exception: any subtree or 35 // leaf where the inode and device numbers are an exact match between dir1 36 // and dir2 will be pruned from the results. This method is *only* to be used 37 // to generating a list of changes between the two directories, as it does not 38 // reflect the full contents. 39 func collectFileInfoForChanges(dir1, dir2 string) (*FileInfo, *FileInfo, error) { 40 w := &walker{ 41 dir1: dir1, 42 dir2: dir2, 43 root1: newRootFileInfo(), 44 root2: newRootFileInfo(), 45 } 46 47 i1, err := os.Lstat(w.dir1) 48 if err != nil { 49 return nil, nil, err 50 } 51 i2, err := os.Lstat(w.dir2) 52 if err != nil { 53 return nil, nil, err 54 } 55 56 if err := w.walk("/", i1, i2); err != nil { 57 return nil, nil, err 58 } 59 60 return w.root1, w.root2, nil 61 } 62 63 // Given a FileInfo, its path info, and a reference to the root of the tree 64 // being constructed, register this file with the tree. 65 func walkchunk(path string, fi os.FileInfo, dir string, root *FileInfo) error { 66 if fi == nil { 67 return nil 68 } 69 parent := root.LookUp(filepath.Dir(path)) 70 if parent == nil { 71 return fmt.Errorf("walkchunk: Unexpectedly no parent for %s", path) 72 } 73 info := &FileInfo{ 74 name: filepath.Base(path), 75 children: make(map[string]*FileInfo), 76 parent: parent, 77 } 78 cpath := filepath.Join(dir, path) 79 stat, err := system.FromStatT(fi.Sys().(*syscall.Stat_t)) 80 if err != nil { 81 return err 82 } 83 info.stat = stat 84 info.capability, _ = system.Lgetxattr(cpath, "security.capability") // lgetxattr(2): fs access 85 parent.children[info.name] = info 86 return nil 87 } 88 89 // Walk a subtree rooted at the same path in both trees being iterated. For 90 // example, /docker/overlay/1234/a/b/c/d and /docker/overlay/8888/a/b/c/d 91 func (w *walker) walk(path string, i1, i2 os.FileInfo) (err error) { 92 // Register these nodes with the return trees, unless we're still at the 93 // (already-created) roots: 94 if path != "/" { 95 if err := walkchunk(path, i1, w.dir1, w.root1); err != nil { 96 return err 97 } 98 if err := walkchunk(path, i2, w.dir2, w.root2); err != nil { 99 return err 100 } 101 } 102 103 is1Dir := i1 != nil && i1.IsDir() 104 is2Dir := i2 != nil && i2.IsDir() 105 106 sameDevice := false 107 if i1 != nil && i2 != nil { 108 si1 := i1.Sys().(*syscall.Stat_t) 109 si2 := i2.Sys().(*syscall.Stat_t) 110 if si1.Dev == si2.Dev { 111 sameDevice = true 112 } 113 } 114 115 // If these files are both non-existent, or leaves (non-dirs), we are done. 116 if !is1Dir && !is2Dir { 117 return nil 118 } 119 120 // Fetch the names of all the files contained in both directories being walked: 121 var names1, names2 []nameIno 122 if is1Dir { 123 names1, err = readdirnames(filepath.Join(w.dir1, path)) // getdents(2): fs access 124 if err != nil { 125 return err 126 } 127 } 128 if is2Dir { 129 names2, err = readdirnames(filepath.Join(w.dir2, path)) // getdents(2): fs access 130 if err != nil { 131 return err 132 } 133 } 134 135 // We have lists of the files contained in both parallel directories, sorted 136 // in the same order. Walk them in parallel, generating a unique merged list 137 // of all items present in either or both directories. 138 var names []string 139 ix1 := 0 140 ix2 := 0 141 142 for { 143 if ix1 >= len(names1) { 144 break 145 } 146 if ix2 >= len(names2) { 147 break 148 } 149 150 ni1 := names1[ix1] 151 ni2 := names2[ix2] 152 153 switch bytes.Compare([]byte(ni1.name), []byte(ni2.name)) { 154 case -1: // ni1 < ni2 -- advance ni1 155 // we will not encounter ni1 in names2 156 names = append(names, ni1.name) 157 ix1++ 158 case 0: // ni1 == ni2 159 if ni1.ino != ni2.ino || !sameDevice { 160 names = append(names, ni1.name) 161 } 162 ix1++ 163 ix2++ 164 case 1: // ni1 > ni2 -- advance ni2 165 // we will not encounter ni2 in names1 166 names = append(names, ni2.name) 167 ix2++ 168 } 169 } 170 for ix1 < len(names1) { 171 names = append(names, names1[ix1].name) 172 ix1++ 173 } 174 for ix2 < len(names2) { 175 names = append(names, names2[ix2].name) 176 ix2++ 177 } 178 179 // For each of the names present in either or both of the directories being 180 // iterated, stat the name under each root, and recurse the pair of them: 181 for _, name := range names { 182 fname := filepath.Join(path, name) 183 var cInfo1, cInfo2 os.FileInfo 184 if is1Dir { 185 cInfo1, err = os.Lstat(filepath.Join(w.dir1, fname)) // lstat(2): fs access 186 if err != nil && !os.IsNotExist(err) { 187 return err 188 } 189 } 190 if is2Dir { 191 cInfo2, err = os.Lstat(filepath.Join(w.dir2, fname)) // lstat(2): fs access 192 if err != nil && !os.IsNotExist(err) { 193 return err 194 } 195 } 196 if err = w.walk(fname, cInfo1, cInfo2); err != nil { 197 return err 198 } 199 } 200 return nil 201 } 202 203 // {name,inode} pairs used to support the early-pruning logic of the walker type 204 type nameIno struct { 205 name string 206 ino uint64 207 } 208 209 type nameInoSlice []nameIno 210 211 func (s nameInoSlice) Len() int { return len(s) } 212 func (s nameInoSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 213 func (s nameInoSlice) Less(i, j int) bool { return s[i].name < s[j].name } 214 215 // readdirnames is a hacked-apart version of the Go stdlib code, exposing inode 216 // numbers further up the stack when reading directory contents. Unlike 217 // os.Readdirnames, which returns a list of filenames, this function returns a 218 // list of {filename,inode} pairs. 219 func readdirnames(dirname string) (names []nameIno, err error) { 220 var ( 221 size = 100 222 buf = make([]byte, 4096) 223 nbuf int 224 bufp int 225 nb int 226 ) 227 228 f, err := os.Open(dirname) 229 if err != nil { 230 return nil, err 231 } 232 defer f.Close() 233 234 names = make([]nameIno, 0, size) // Empty with room to grow. 235 for { 236 // Refill the buffer if necessary 237 if bufp >= nbuf { 238 bufp = 0 239 nbuf, err = unix.ReadDirent(int(f.Fd()), buf) // getdents on linux 240 if nbuf < 0 { 241 nbuf = 0 242 } 243 if err != nil { 244 return nil, os.NewSyscallError("readdirent", err) 245 } 246 if nbuf <= 0 { 247 break // EOF 248 } 249 } 250 251 // Drain the buffer 252 nb, names = parseDirent(buf[bufp:nbuf], names) 253 bufp += nb 254 } 255 256 sl := nameInoSlice(names) 257 sort.Sort(sl) 258 return sl, nil 259 } 260 261 // parseDirent is a minor modification of unix.ParseDirent (linux version) 262 // which returns {name,inode} pairs instead of just names. 263 func parseDirent(buf []byte, names []nameIno) (consumed int, newnames []nameIno) { 264 origlen := len(buf) 265 for len(buf) > 0 { 266 dirent := (*unix.Dirent)(unsafe.Pointer(&buf[0])) 267 buf = buf[dirent.Reclen:] 268 if dirent.Ino == 0 { // File absent in directory. 269 continue 270 } 271 bytes := (*[10000]byte)(unsafe.Pointer(&dirent.Name[0])) 272 name := string(bytes[0:clen(bytes[:])]) 273 if name == "." || name == ".." { // Useless names 274 continue 275 } 276 names = append(names, nameIno{name, dirent.Ino}) 277 } 278 return origlen - len(buf), names 279 } 280 281 func clen(n []byte) int { 282 for i := 0; i < len(n); i++ { 283 if n[i] == 0 { 284 return i 285 } 286 } 287 return len(n) 288 }