gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/runsc/container/state_file.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package container 16 17 import ( 18 "encoding/json" 19 "errors" 20 "fmt" 21 "io/ioutil" 22 "os" 23 "path/filepath" 24 "regexp" 25 "strings" 26 27 "github.com/gofrs/flock" 28 "gvisor.dev/gvisor/pkg/log" 29 "gvisor.dev/gvisor/pkg/sync" 30 ) 31 32 const stateFileExtension = "state" 33 34 // ErrStateFileLocked is returned by Load() when the state file is locked 35 // and TryLock is enabled. 36 var ErrStateFileLocked = errors.New("state file locked") 37 38 // TryLock represents whether we should block waiting for the lock to be acquired or not. 39 type TryLock bool 40 41 const ( 42 // BlockAcquire means we will block until the lock can be acquired. 43 BlockAcquire TryLock = false 44 45 // TryAcquire means we will fail fast if the lock cannot be acquired. 46 TryAcquire TryLock = true 47 ) 48 49 // LoadOpts provides options for Load()ing a container. 50 type LoadOpts struct { 51 // Exact tells whether the search should be exact. See Load() for more. 52 Exact bool 53 54 // SkipCheck tells Load() to skip checking if container is runnning. 55 SkipCheck bool 56 57 // TryLock tells Load() to fail if the container state file cannot be locked, 58 // as opposed to blocking until it is available. 59 // When the state file cannot be locked, it will error with ErrStateFileLocked. 60 TryLock TryLock 61 62 // RootContainer when true matches the search only with the root container of 63 // a sandbox. This is used when looking for a sandbox given that root 64 // container and sandbox share the same ID. 65 RootContainer bool 66 } 67 68 // Load loads a container with the given id from a metadata file. "id" may 69 // be an abbreviation of the full container id in case LoadOpts.Exact if not 70 // set. It also checks if the container is still running, in order to return 71 // an error to the caller earlier. This check is skipped if LoadOpts.SkipCheck 72 // is set. 73 // 74 // Returns ErrNotExist if no container is found. Returns error in case more than 75 // one containers matching the ID prefix is found. 76 func Load(rootDir string, id FullID, opts LoadOpts) (*Container, error) { 77 log.Debugf("Load container, rootDir: %q, id: %+v, opts: %+v", rootDir, id, opts) 78 if !opts.Exact { 79 var err error 80 id, err = findContainerID(rootDir, id.ContainerID) 81 if err != nil { 82 // Preserve error so that callers can distinguish 'not found' errors. 83 return nil, err 84 } 85 } 86 87 if err := id.validate(); err != nil { 88 return nil, fmt.Errorf("invalid container id: %v", err) 89 } 90 state := StateFile{ 91 RootDir: rootDir, 92 ID: id, 93 } 94 defer state.close() 95 96 c := &Container{} 97 if err := state.load(c, opts); err != nil { 98 if os.IsNotExist(err) { 99 // Preserve error so that callers can distinguish 'not found' errors. 100 return nil, err 101 } 102 return nil, fmt.Errorf("reading container metadata file %q: %v", state.statePath(), err) 103 } 104 105 if opts.RootContainer && c.ID != c.Sandbox.ID { 106 return nil, fmt.Errorf("ID %q doesn't belong to a sandbox", id) 107 } 108 109 if !opts.SkipCheck { 110 // If the status is "Running" or "Created", check that the sandbox/container 111 // is still running, setting it to Stopped if not. 112 // 113 // This is inherently racy. 114 switch c.Status { 115 case Created, Running: 116 c.CheckStopped() 117 } 118 } 119 120 return c, nil 121 } 122 123 // List returns all container ids in the given root directory. 124 func List(rootDir string) ([]FullID, error) { 125 log.Debugf("List containers %q", rootDir) 126 return listMatch(rootDir, FullID{}) 127 } 128 129 // ListSandboxes returns all sandbox ids in the given root directory. 130 func ListSandboxes(rootDir string) ([]FullID, error) { 131 log.Debugf("List containers %q", rootDir) 132 ids, err := List(rootDir) 133 if err != nil { 134 return nil, err 135 } 136 137 sandboxes := make(map[string]struct{}, len(ids)) 138 for _, id := range ids { 139 sandboxes[id.SandboxID] = struct{}{} 140 } 141 // Reset ids to list only sandboxes. 142 ids = nil 143 for id := range sandboxes { 144 ids = append(ids, FullID{SandboxID: id, ContainerID: id}) 145 } 146 return ids, nil 147 } 148 149 // listMatch returns all container ids that match the provided id. 150 func listMatch(rootDir string, id FullID) ([]FullID, error) { 151 id.SandboxID += "*" 152 id.ContainerID += "*" 153 pattern := buildPath(rootDir, id, stateFileExtension) 154 list, err := filepath.Glob(pattern) 155 if err != nil { 156 return nil, err 157 } 158 var out []FullID 159 for _, path := range list { 160 id, err := parseFileName(filepath.Base(path)) 161 if err == nil { 162 out = append(out, id) 163 } 164 } 165 return out, nil 166 } 167 168 // LoadSandbox loads all containers that belong to the sandbox with the given 169 // ID. 170 func LoadSandbox(rootDir, id string, opts LoadOpts) ([]*Container, error) { 171 cids, err := listMatch(rootDir, FullID{SandboxID: id}) 172 if err != nil { 173 return nil, err 174 } 175 176 // Override load options that don't make sense in the context of this function. 177 opts.SkipCheck = true // We're loading all containers irrespective of status. 178 opts.RootContainer = false // We're loading all containers, not just the root one. 179 opts.Exact = true // We'll iterate over exact container IDs below. 180 181 // Load the container metadata. 182 var containers []*Container 183 for _, cid := range cids { 184 container, err := Load(rootDir, cid, opts) 185 if err != nil { 186 // Container file may not exist if it raced with creation/deletion or 187 // directory was left behind. Load provides a snapshot in time, so it's 188 // fine to skip it. 189 if os.IsNotExist(err) { 190 continue 191 } 192 return nil, fmt.Errorf("loading sandbox %q, failed to load container %q: %v", id, cid, err) 193 } 194 containers = append(containers, container) 195 } 196 return containers, nil 197 } 198 199 func findContainerID(rootDir, partialID string) (FullID, error) { 200 // Check whether the id fully specifies an existing container. 201 pattern := buildPath(rootDir, FullID{SandboxID: "*", ContainerID: partialID + "*"}, stateFileExtension) 202 list, err := filepath.Glob(pattern) 203 if err != nil { 204 return FullID{}, err 205 } 206 switch len(list) { 207 case 0: 208 return FullID{}, os.ErrNotExist 209 case 1: 210 return parseFileName(filepath.Base(list[0])) 211 } 212 213 // Now see whether id could be an abbreviation of exactly 1 of the 214 // container ids. If id is ambiguous (it could match more than 1 215 // container), it is an error. 216 ids, err := List(rootDir) 217 if err != nil { 218 return FullID{}, err 219 } 220 var rv *FullID 221 for _, id := range ids { 222 if strings.HasPrefix(id.ContainerID, partialID) { 223 if rv != nil { 224 return FullID{}, fmt.Errorf("id %q is ambiguous and could refer to multiple containers: %q, %q", partialID, rv, id) 225 } 226 rv = &id 227 } 228 } 229 if rv == nil { 230 return FullID{}, os.ErrNotExist 231 } 232 log.Debugf("abbreviated id %q resolves to full id %v", partialID, *rv) 233 return *rv, nil 234 } 235 236 func parseFileName(name string) (FullID, error) { 237 re := regexp.MustCompile(`([\w+-\.]+)_sandbox:([\w+-\.]+)\.` + stateFileExtension) 238 groups := re.FindStringSubmatch(name) 239 if len(groups) != 3 { 240 return FullID{}, fmt.Errorf("invalid state file name format: %q", name) 241 } 242 id := FullID{ 243 SandboxID: groups[2], 244 ContainerID: groups[1], 245 } 246 if err := id.validate(); err != nil { 247 return FullID{}, fmt.Errorf("invalid state file name %q: %w", name, err) 248 } 249 return id, nil 250 } 251 252 // FullID combines sandbox and container ID to identify a container. Sandbox ID 253 // is used to allow all containers for a given sandbox to be loaded by matching 254 // sandbox ID in the file name. 255 type FullID struct { 256 SandboxID string `json:"sandboxId"` 257 ContainerID string `json:"containerId"` 258 } 259 260 func (f *FullID) String() string { 261 return f.SandboxID + "/" + f.ContainerID 262 } 263 264 func (f *FullID) validate() error { 265 if err := validateID(f.SandboxID); err != nil { 266 return err 267 } 268 return validateID(f.ContainerID) 269 } 270 271 // StateFile handles load from/save to container state safely from multiple 272 // processes. It uses a lock file to provide synchronization between operations. 273 // 274 // The lock file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.lock". 275 // The state file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.state". 276 type StateFile struct { 277 // RootDir is the directory containing the container metadata file. 278 RootDir string `json:"rootDir"` 279 280 // ID is the sandbox+container ID. 281 ID FullID `json:"id"` 282 283 // 284 // Fields below this line are not saved in the state file and will not 285 // be preserved across commands. 286 // 287 288 once sync.Once `nojson:"true"` 289 flock *flock.Flock `nojson:"true"` 290 } 291 292 // lock globally locks all locking operations for the container. 293 func (s *StateFile) lock(tryLock TryLock) error { 294 s.once.Do(func() { 295 s.flock = flock.New(s.lockPath()) 296 }) 297 298 if tryLock { 299 gotLock, err := s.flock.TryLock() 300 if err != nil { 301 return fmt.Errorf("acquiring lock on %q: %v", s.flock, err) 302 } 303 if !gotLock { 304 return ErrStateFileLocked 305 } 306 } else { 307 if err := s.flock.Lock(); err != nil { 308 return fmt.Errorf("acquiring lock on %q: %v", s.flock, err) 309 } 310 } 311 return nil 312 } 313 314 // LockForNew acquires the lock and checks if the state file doesn't exist. This 315 // is done to ensure that more than one creation didn't race to create 316 // containers with the same ID. 317 func (s *StateFile) LockForNew() error { 318 if err := s.lock(BlockAcquire); err != nil { 319 return err 320 } 321 322 // Checks if the container already exists by looking for the metadata file. 323 if _, err := os.Stat(s.statePath()); err == nil { 324 s.UnlockOrDie() 325 return fmt.Errorf("container already exists") 326 } else if !os.IsNotExist(err) { 327 s.UnlockOrDie() 328 return fmt.Errorf("looking for existing container: %v", err) 329 } 330 return nil 331 } 332 333 // unlock globally unlocks all locking operations for the container. 334 func (s *StateFile) unlock() error { 335 if !s.flock.Locked() { 336 panic("unlock called without lock held") 337 } 338 339 if err := s.flock.Unlock(); err != nil { 340 log.Warningf("Error to release lock on %q: %v", s.flock, err) 341 return fmt.Errorf("releasing lock on %q: %v", s.flock, err) 342 } 343 return nil 344 } 345 346 // UnlockOrDie is the same as unlock() but panics in case of failure. 347 func (s *StateFile) UnlockOrDie() { 348 if !s.flock.Locked() { 349 panic("unlock called without lock held") 350 } 351 if err := s.flock.Unlock(); err != nil { 352 panic(fmt.Sprintf("Error releasing lock on %q: %v", s.flock, err)) 353 } 354 } 355 356 // SaveLocked saves 'v' to the state file. 357 // 358 // Preconditions: lock(*) must been called before. 359 func (s *StateFile) SaveLocked(v any) error { 360 if !s.flock.Locked() { 361 panic("saveLocked called without lock held") 362 } 363 364 meta, err := json.Marshal(v) 365 if err != nil { 366 return err 367 } 368 if err := ioutil.WriteFile(s.statePath(), meta, 0640); err != nil { 369 return fmt.Errorf("writing json file: %v", err) 370 } 371 return nil 372 } 373 374 // Stat returns the result of calling stat() on the state file. 375 // Doing so does not require locking. 376 func (s *StateFile) Stat() (os.FileInfo, error) { 377 return os.Stat(s.statePath()) 378 } 379 380 func (s *StateFile) load(v any, opts LoadOpts) error { 381 if err := s.lock(opts.TryLock); err != nil { 382 return err 383 } 384 defer s.UnlockOrDie() 385 386 metaBytes, err := ioutil.ReadFile(s.statePath()) 387 if err != nil { 388 return err 389 } 390 return json.Unmarshal(metaBytes, &v) 391 } 392 393 func (s *StateFile) close() error { 394 if s.flock == nil { 395 return nil 396 } 397 if s.flock.Locked() { 398 panic("Closing locked file") 399 } 400 return s.flock.Close() 401 } 402 403 func buildPath(rootDir string, id FullID, extension string) string { 404 // Note: "_" and ":" are not valid in IDs. 405 name := fmt.Sprintf("%s_sandbox:%s.%s", id.ContainerID, id.SandboxID, extension) 406 return filepath.Join(rootDir, name) 407 } 408 409 // statePath is the full path to the state file. 410 func (s *StateFile) statePath() string { 411 return buildPath(s.RootDir, s.ID, stateFileExtension) 412 } 413 414 // lockPath is the full path to the lock file. 415 func (s *StateFile) lockPath() string { 416 return buildPath(s.RootDir, s.ID, "lock") 417 } 418 419 // Destroy deletes all state created by the stateFile. It may be called with the 420 // lock file held. In that case, the lock file must still be unlocked and 421 // properly closed after destroy returns. 422 func (s *StateFile) Destroy() error { 423 if err := os.Remove(s.statePath()); err != nil && !os.IsNotExist(err) { 424 return err 425 } 426 if err := os.Remove(s.lockPath()); err != nil && !os.IsNotExist(err) { 427 return err 428 } 429 return nil 430 }