k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/volume/util/atomic_writer.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package util 18 19 import ( 20 "bytes" 21 "fmt" 22 "os" 23 "path" 24 "path/filepath" 25 "runtime" 26 "strings" 27 "time" 28 29 "k8s.io/klog/v2" 30 31 "k8s.io/apimachinery/pkg/util/sets" 32 ) 33 34 const ( 35 maxFileNameLength = 255 36 maxPathLength = 4096 37 ) 38 39 // AtomicWriter handles atomically projecting content for a set of files into 40 // a target directory. 41 // 42 // Note: 43 // 44 // 1. AtomicWriter reserves the set of pathnames starting with `..`. 45 // 2. AtomicWriter offers no concurrency guarantees and must be synchronized 46 // by the caller. 47 // 48 // The visible files in this volume are symlinks to files in the writer's data 49 // directory. Actual files are stored in a hidden timestamped directory which 50 // is symlinked to by the data directory. The timestamped directory and 51 // data directory symlink are created in the writer's target dir. This scheme 52 // allows the files to be atomically updated by changing the target of the 53 // data directory symlink. 54 // 55 // Consumers of the target directory can monitor the ..data symlink using 56 // inotify or fanotify to receive events when the content in the volume is 57 // updated. 58 type AtomicWriter struct { 59 targetDir string 60 logContext string 61 } 62 63 // FileProjection contains file Data and access Mode 64 type FileProjection struct { 65 Data []byte 66 Mode int32 67 FsUser *int64 68 } 69 70 // NewAtomicWriter creates a new AtomicWriter configured to write to the given 71 // target directory, or returns an error if the target directory does not exist. 72 func NewAtomicWriter(targetDir string, logContext string) (*AtomicWriter, error) { 73 _, err := os.Stat(targetDir) 74 if os.IsNotExist(err) { 75 return nil, err 76 } 77 78 return &AtomicWriter{targetDir: targetDir, logContext: logContext}, nil 79 } 80 81 const ( 82 dataDirName = "..data" 83 newDataDirName = "..data_tmp" 84 ) 85 86 // Write does an atomic projection of the given payload into the writer's target 87 // directory. Input paths must not begin with '..'. 88 // setPerms is an optional pointer to a function that caller can provide to set the 89 // permissions of the newly created files before they are published. The function is 90 // passed subPath which is the name of the timestamped directory that was created 91 // under target directory. 92 // 93 // The Write algorithm is: 94 // 95 // 1. The payload is validated; if the payload is invalid, the function returns 96 // 97 // 2. The current timestamped directory is detected by reading the data directory 98 // symlink 99 // 100 // 3. The old version of the volume is walked to determine whether any 101 // portion of the payload was deleted and is still present on disk. 102 // 103 // 4. The data in the current timestamped directory is compared to the projected 104 // data to determine if an update to data directory is required. 105 // 106 // 5. A new timestamped dir is created if an update is required. 107 // 108 // 6. The payload is written to the new timestamped directory. 109 // 110 // 7. Permissions are set (if setPerms is not nil) on the new timestamped directory and files. 111 // 112 // 8. A symlink to the new timestamped directory ..data_tmp is created that will 113 // become the new data directory. 114 // 115 // 9. The new data directory symlink is renamed to the data directory; rename is atomic. 116 // 117 // 10. Symlinks and directory for new user-visible files are created (if needed). 118 // 119 // For example, consider the files: 120 // <target-dir>/podName 121 // <target-dir>/user/labels 122 // <target-dir>/k8s/annotations 123 // 124 // The user visible files are symbolic links into the internal data directory: 125 // <target-dir>/podName -> ..data/podName 126 // <target-dir>/usr -> ..data/usr 127 // <target-dir>/k8s -> ..data/k8s 128 // 129 // The data directory itself is a link to a timestamped directory with 130 // the real data: 131 // <target-dir>/..data -> ..2016_02_01_15_04_05.12345678/ 132 // NOTE(claudiub): We need to create these symlinks AFTER we've finished creating and 133 // linking everything else. On Windows, if a target does not exist, the created symlink 134 // will not work properly if the target ends up being a directory. 135 // 136 // 11. Old paths are removed from the user-visible portion of the target directory. 137 // 138 // 12. The previous timestamped directory is removed, if it exists. 139 func (w *AtomicWriter) Write(payload map[string]FileProjection, setPerms func(subPath string) error) error { 140 // (1) 141 cleanPayload, err := validatePayload(payload) 142 if err != nil { 143 klog.Errorf("%s: invalid payload: %v", w.logContext, err) 144 return err 145 } 146 147 // (2) 148 dataDirPath := filepath.Join(w.targetDir, dataDirName) 149 oldTsDir, err := os.Readlink(dataDirPath) 150 if err != nil { 151 if !os.IsNotExist(err) { 152 klog.Errorf("%s: error reading link for data directory: %v", w.logContext, err) 153 return err 154 } 155 // although Readlink() returns "" on err, don't be fragile by relying on it (since it's not specified in docs) 156 // empty oldTsDir indicates that it didn't exist 157 oldTsDir = "" 158 } 159 oldTsPath := filepath.Join(w.targetDir, oldTsDir) 160 161 var pathsToRemove sets.String 162 shouldWrite := true 163 // if there was no old version, there's nothing to remove 164 if len(oldTsDir) != 0 { 165 // (3) 166 pathsToRemove, err = w.pathsToRemove(cleanPayload, oldTsPath) 167 if err != nil { 168 klog.Errorf("%s: error determining user-visible files to remove: %v", w.logContext, err) 169 return err 170 } 171 172 // (4) 173 if should, err := shouldWritePayload(cleanPayload, oldTsPath); err != nil { 174 klog.Errorf("%s: error determining whether payload should be written to disk: %v", w.logContext, err) 175 return err 176 } else if !should && len(pathsToRemove) == 0 { 177 klog.V(4).Infof("%s: write not required for data directory %v", w.logContext, oldTsDir) 178 // data directory is already up to date, but we need to make sure that 179 // the user-visible symlinks are created. 180 // See https://github.com/kubernetes/kubernetes/issues/121472 for more details. 181 // Reset oldTsDir to empty string to avoid removing the data directory. 182 shouldWrite = false 183 oldTsDir = "" 184 } else { 185 klog.V(4).Infof("%s: write required for target directory %v", w.logContext, w.targetDir) 186 } 187 } 188 189 if shouldWrite { 190 // (5) 191 tsDir, err := w.newTimestampDir() 192 if err != nil { 193 klog.V(4).Infof("%s: error creating new ts data directory: %v", w.logContext, err) 194 return err 195 } 196 tsDirName := filepath.Base(tsDir) 197 198 // (6) 199 if err = w.writePayloadToDir(cleanPayload, tsDir); err != nil { 200 klog.Errorf("%s: error writing payload to ts data directory %s: %v", w.logContext, tsDir, err) 201 return err 202 } 203 klog.V(4).Infof("%s: performed write of new data to ts data directory: %s", w.logContext, tsDir) 204 205 // (7) 206 if setPerms != nil { 207 if err := setPerms(tsDirName); err != nil { 208 klog.Errorf("%s: error applying ownership settings: %v", w.logContext, err) 209 return err 210 } 211 } 212 213 // (8) 214 newDataDirPath := filepath.Join(w.targetDir, newDataDirName) 215 if err = os.Symlink(tsDirName, newDataDirPath); err != nil { 216 if err := os.RemoveAll(tsDir); err != nil { 217 klog.Errorf("%s: error removing new ts directory %s: %v", w.logContext, tsDir, err) 218 } 219 klog.Errorf("%s: error creating symbolic link for atomic update: %v", w.logContext, err) 220 return err 221 } 222 223 // (9) 224 if runtime.GOOS == "windows" { 225 if err := os.Remove(dataDirPath); err != nil { 226 klog.Errorf("%s: error removing data dir directory %s: %v", w.logContext, dataDirPath, err) 227 } 228 err = os.Symlink(tsDirName, dataDirPath) 229 if err := os.Remove(newDataDirPath); err != nil { 230 klog.Errorf("%s: error removing new data dir directory %s: %v", w.logContext, newDataDirPath, err) 231 } 232 } else { 233 err = os.Rename(newDataDirPath, dataDirPath) 234 } 235 if err != nil { 236 if err := os.Remove(newDataDirPath); err != nil && err != os.ErrNotExist { 237 klog.Errorf("%s: error removing new data dir directory %s: %v", w.logContext, newDataDirPath, err) 238 } 239 if err := os.RemoveAll(tsDir); err != nil { 240 klog.Errorf("%s: error removing new ts directory %s: %v", w.logContext, tsDir, err) 241 } 242 klog.Errorf("%s: error renaming symbolic link for data directory %s: %v", w.logContext, newDataDirPath, err) 243 return err 244 } 245 } 246 247 // (10) 248 if err = w.createUserVisibleFiles(cleanPayload); err != nil { 249 klog.Errorf("%s: error creating visible symlinks in %s: %v", w.logContext, w.targetDir, err) 250 return err 251 } 252 253 // (11) 254 if err = w.removeUserVisiblePaths(pathsToRemove); err != nil { 255 klog.Errorf("%s: error removing old visible symlinks: %v", w.logContext, err) 256 return err 257 } 258 259 // (12) 260 if len(oldTsDir) > 0 { 261 if err = os.RemoveAll(oldTsPath); err != nil { 262 klog.Errorf("%s: error removing old data directory %s: %v", w.logContext, oldTsDir, err) 263 return err 264 } 265 } 266 267 return nil 268 } 269 270 // validatePayload returns an error if any path in the payload returns a copy of the payload with the paths cleaned. 271 func validatePayload(payload map[string]FileProjection) (map[string]FileProjection, error) { 272 cleanPayload := make(map[string]FileProjection) 273 for k, content := range payload { 274 if err := validatePath(k); err != nil { 275 return nil, err 276 } 277 278 cleanPayload[filepath.Clean(k)] = content 279 } 280 281 return cleanPayload, nil 282 } 283 284 // validatePath validates a single path, returning an error if the path is 285 // invalid. paths may not: 286 // 287 // 1. be absolute 288 // 2. contain '..' as an element 289 // 3. start with '..' 290 // 4. contain filenames larger than 255 characters 291 // 5. be longer than 4096 characters 292 func validatePath(targetPath string) error { 293 // TODO: somehow unify this with the similar api validation, 294 // validateVolumeSourcePath; the error semantics are just different enough 295 // from this that it was time-prohibitive trying to find the right 296 // refactoring to re-use. 297 if targetPath == "" { 298 return fmt.Errorf("invalid path: must not be empty: %q", targetPath) 299 } 300 if path.IsAbs(targetPath) { 301 return fmt.Errorf("invalid path: must be relative path: %s", targetPath) 302 } 303 304 if len(targetPath) > maxPathLength { 305 return fmt.Errorf("invalid path: must be less than or equal to %d characters", maxPathLength) 306 } 307 308 items := strings.Split(targetPath, string(os.PathSeparator)) 309 for _, item := range items { 310 if item == ".." { 311 return fmt.Errorf("invalid path: must not contain '..': %s", targetPath) 312 } 313 if len(item) > maxFileNameLength { 314 return fmt.Errorf("invalid path: filenames must be less than or equal to %d characters", maxFileNameLength) 315 } 316 } 317 if strings.HasPrefix(items[0], "..") && len(items[0]) > 2 { 318 return fmt.Errorf("invalid path: must not start with '..': %s", targetPath) 319 } 320 321 return nil 322 } 323 324 // shouldWritePayload returns whether the payload should be written to disk. 325 func shouldWritePayload(payload map[string]FileProjection, oldTsDir string) (bool, error) { 326 for userVisiblePath, fileProjection := range payload { 327 shouldWrite, err := shouldWriteFile(filepath.Join(oldTsDir, userVisiblePath), fileProjection.Data) 328 if err != nil { 329 return false, err 330 } 331 332 if shouldWrite { 333 return true, nil 334 } 335 } 336 337 return false, nil 338 } 339 340 // shouldWriteFile returns whether a new version of a file should be written to disk. 341 func shouldWriteFile(path string, content []byte) (bool, error) { 342 _, err := os.Lstat(path) 343 if os.IsNotExist(err) { 344 return true, nil 345 } 346 347 contentOnFs, err := os.ReadFile(path) 348 if err != nil { 349 return false, err 350 } 351 352 return !bytes.Equal(content, contentOnFs), nil 353 } 354 355 // pathsToRemove walks the current version of the data directory and 356 // determines which paths should be removed (if any) after the payload is 357 // written to the target directory. 358 func (w *AtomicWriter) pathsToRemove(payload map[string]FileProjection, oldTsDir string) (sets.String, error) { 359 paths := sets.NewString() 360 visitor := func(path string, info os.FileInfo, err error) error { 361 relativePath := strings.TrimPrefix(path, oldTsDir) 362 relativePath = strings.TrimPrefix(relativePath, string(os.PathSeparator)) 363 if relativePath == "" { 364 return nil 365 } 366 367 paths.Insert(relativePath) 368 return nil 369 } 370 371 err := filepath.Walk(oldTsDir, visitor) 372 if os.IsNotExist(err) { 373 return nil, nil 374 } else if err != nil { 375 return nil, err 376 } 377 klog.V(5).Infof("%s: current paths: %+v", w.targetDir, paths.List()) 378 379 newPaths := sets.NewString() 380 for file := range payload { 381 // add all subpaths for the payload to the set of new paths 382 // to avoid attempting to remove non-empty dirs 383 for subPath := file; subPath != ""; { 384 newPaths.Insert(subPath) 385 subPath, _ = filepath.Split(subPath) 386 subPath = strings.TrimSuffix(subPath, string(os.PathSeparator)) 387 } 388 } 389 klog.V(5).Infof("%s: new paths: %+v", w.targetDir, newPaths.List()) 390 391 result := paths.Difference(newPaths) 392 klog.V(5).Infof("%s: paths to remove: %+v", w.targetDir, result) 393 394 return result, nil 395 } 396 397 // newTimestampDir creates a new timestamp directory 398 func (w *AtomicWriter) newTimestampDir() (string, error) { 399 tsDir, err := os.MkdirTemp(w.targetDir, time.Now().UTC().Format("..2006_01_02_15_04_05.")) 400 if err != nil { 401 klog.Errorf("%s: unable to create new temp directory: %v", w.logContext, err) 402 return "", err 403 } 404 405 // 0755 permissions are needed to allow 'group' and 'other' to recurse the 406 // directory tree. do a chmod here to ensure that permissions are set correctly 407 // regardless of the process' umask. 408 err = os.Chmod(tsDir, 0755) 409 if err != nil { 410 klog.Errorf("%s: unable to set mode on new temp directory: %v", w.logContext, err) 411 return "", err 412 } 413 414 return tsDir, nil 415 } 416 417 // writePayloadToDir writes the given payload to the given directory. The 418 // directory must exist. 419 func (w *AtomicWriter) writePayloadToDir(payload map[string]FileProjection, dir string) error { 420 for userVisiblePath, fileProjection := range payload { 421 content := fileProjection.Data 422 mode := os.FileMode(fileProjection.Mode) 423 fullPath := filepath.Join(dir, userVisiblePath) 424 baseDir, _ := filepath.Split(fullPath) 425 426 if err := os.MkdirAll(baseDir, os.ModePerm); err != nil { 427 klog.Errorf("%s: unable to create directory %s: %v", w.logContext, baseDir, err) 428 return err 429 } 430 431 if err := os.WriteFile(fullPath, content, mode); err != nil { 432 klog.Errorf("%s: unable to write file %s with mode %v: %v", w.logContext, fullPath, mode, err) 433 return err 434 } 435 // Chmod is needed because os.WriteFile() ends up calling 436 // open(2) to create the file, so the final mode used is "mode & 437 // ~umask". But we want to make sure the specified mode is used 438 // in the file no matter what the umask is. 439 if err := os.Chmod(fullPath, mode); err != nil { 440 klog.Errorf("%s: unable to change file %s with mode %v: %v", w.logContext, fullPath, mode, err) 441 return err 442 } 443 444 if fileProjection.FsUser == nil { 445 continue 446 } 447 if err := os.Chown(fullPath, int(*fileProjection.FsUser), -1); err != nil { 448 klog.Errorf("%s: unable to change file %s with owner %v: %v", w.logContext, fullPath, int(*fileProjection.FsUser), err) 449 return err 450 } 451 } 452 453 return nil 454 } 455 456 // createUserVisibleFiles creates the relative symlinks for all the 457 // files configured in the payload. If the directory in a file path does not 458 // exist, it is created. 459 // 460 // Viz: 461 // For files: "bar", "foo/bar", "baz/bar", "foo/baz/blah" 462 // the following symlinks are created: 463 // bar -> ..data/bar 464 // foo -> ..data/foo 465 // baz -> ..data/baz 466 func (w *AtomicWriter) createUserVisibleFiles(payload map[string]FileProjection) error { 467 for userVisiblePath := range payload { 468 slashpos := strings.Index(userVisiblePath, string(os.PathSeparator)) 469 if slashpos == -1 { 470 slashpos = len(userVisiblePath) 471 } 472 linkname := userVisiblePath[:slashpos] 473 _, err := os.Readlink(filepath.Join(w.targetDir, linkname)) 474 if err != nil && os.IsNotExist(err) { 475 // The link into the data directory for this path doesn't exist; create it 476 visibleFile := filepath.Join(w.targetDir, linkname) 477 dataDirFile := filepath.Join(dataDirName, linkname) 478 479 err = os.Symlink(dataDirFile, visibleFile) 480 if err != nil { 481 return err 482 } 483 } 484 } 485 return nil 486 } 487 488 // removeUserVisiblePaths removes the set of paths from the user-visible 489 // portion of the writer's target directory. 490 func (w *AtomicWriter) removeUserVisiblePaths(paths sets.String) error { 491 ps := string(os.PathSeparator) 492 var lasterr error 493 for p := range paths { 494 // only remove symlinks from the volume root directory (i.e. items that don't contain '/') 495 if strings.Contains(p, ps) { 496 continue 497 } 498 if err := os.Remove(filepath.Join(w.targetDir, p)); err != nil { 499 klog.Errorf("%s: error pruning old user-visible path %s: %v", w.logContext, p, err) 500 lasterr = err 501 } 502 } 503 504 return lasterr 505 }