github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/runsc/container/state_file.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package container 16 17 import ( 18 "encoding/json" 19 "errors" 20 "fmt" 21 "io/ioutil" 22 "os" 23 "path/filepath" 24 "regexp" 25 "strings" 26 27 "github.com/gofrs/flock" 28 "golang.org/x/sys/unix" 29 "github.com/metacubex/gvisor/pkg/log" 30 "github.com/metacubex/gvisor/pkg/sync" 31 ) 32 33 const stateFileExtension = "state" 34 35 // ErrStateFileLocked is returned by Load() when the state file is locked 36 // and TryLock is enabled. 37 var ErrStateFileLocked = errors.New("state file locked") 38 39 // TryLock represents whether we should block waiting for the lock to be acquired or not. 40 type TryLock bool 41 42 const ( 43 // BlockAcquire means we will block until the lock can be acquired. 44 BlockAcquire TryLock = false 45 46 // TryAcquire means we will fail fast if the lock cannot be acquired. 47 TryAcquire TryLock = true 48 ) 49 50 // LoadOpts provides options for Load()ing a container. 51 type LoadOpts struct { 52 // Exact tells whether the search should be exact. See Load() for more. 53 Exact bool 54 55 // SkipCheck tells Load() to skip checking if container is runnning. 56 SkipCheck bool 57 58 // TryLock tells Load() to fail if the container state file cannot be locked, 59 // as opposed to blocking until it is available. 60 // When the state file cannot be locked, it will error with ErrStateFileLocked. 61 TryLock TryLock 62 63 // RootContainer when true matches the search only with the root container of 64 // a sandbox. This is used when looking for a sandbox given that root 65 // container and sandbox share the same ID. 66 RootContainer bool 67 } 68 69 // Load loads a container with the given id from a metadata file. "id" may 70 // be an abbreviation of the full container id in case LoadOpts.Exact if not 71 // set. It also checks if the container is still running, in order to return 72 // an error to the caller earlier. This check is skipped if LoadOpts.SkipCheck 73 // is set. 74 // 75 // Returns ErrNotExist if no container is found. Returns error in case more than 76 // one containers matching the ID prefix is found. 77 func Load(rootDir string, id FullID, opts LoadOpts) (*Container, error) { 78 log.Debugf("Load container, rootDir: %q, id: %+v, opts: %+v", rootDir, id, opts) 79 if !opts.Exact { 80 var err error 81 id, err = findContainerID(rootDir, id.ContainerID) 82 if err != nil { 83 // Preserve error so that callers can distinguish 'not found' errors. 84 return nil, err 85 } 86 } 87 88 if err := id.validate(); err != nil { 89 return nil, fmt.Errorf("invalid container id: %v", err) 90 } 91 state := StateFile{ 92 RootDir: rootDir, 93 ID: id, 94 } 95 defer state.close() 96 97 c := &Container{} 98 if err := state.load(c, opts); err != nil { 99 if os.IsNotExist(err) { 100 // Preserve error so that callers can distinguish 'not found' errors. 101 return nil, err 102 } 103 return nil, fmt.Errorf("reading container metadata file %q: %v", state.statePath(), err) 104 } 105 106 if opts.RootContainer && c.ID != c.Sandbox.ID { 107 return nil, fmt.Errorf("ID %q doesn't belong to a sandbox", id) 108 } 109 110 if !opts.SkipCheck { 111 // If the status is "Running" or "Created", check that the sandbox/container 112 // is still running, setting it to Stopped if not. 113 // 114 // This is inherently racy. 115 switch c.Status { 116 case Created: 117 if !c.IsSandboxRunning() { 118 // Sandbox no longer exists, so this container definitely does not exist. 119 log.Warningf("Process for sandbox %v is no longer running; assuming container is in stopped state", c.Sandbox.ID) 120 c.changeStatus(Stopped) 121 } 122 case Running: 123 if err := c.SignalContainer(unix.Signal(0), false); err != nil { 124 log.Warningf("Cannot signal container %v for sandbox %v (err: %v); assuming container is in stopped state", c.ID, c.Sandbox.ID, err) 125 c.changeStatus(Stopped) 126 } 127 } 128 } 129 130 return c, nil 131 } 132 133 // List returns all container ids in the given root directory. 134 func List(rootDir string) ([]FullID, error) { 135 log.Debugf("List containers %q", rootDir) 136 return listMatch(rootDir, FullID{}) 137 } 138 139 // ListSandboxes returns all sandbox ids in the given root directory. 140 func ListSandboxes(rootDir string) ([]FullID, error) { 141 log.Debugf("List containers %q", rootDir) 142 ids, err := List(rootDir) 143 if err != nil { 144 return nil, err 145 } 146 147 sandboxes := make(map[string]struct{}, len(ids)) 148 for _, id := range ids { 149 sandboxes[id.SandboxID] = struct{}{} 150 } 151 // Reset ids to list only sandboxes. 152 ids = nil 153 for id := range sandboxes { 154 ids = append(ids, FullID{SandboxID: id, ContainerID: id}) 155 } 156 return ids, nil 157 } 158 159 // listMatch returns all container ids that match the provided id. 160 func listMatch(rootDir string, id FullID) ([]FullID, error) { 161 id.SandboxID += "*" 162 id.ContainerID += "*" 163 pattern := buildPath(rootDir, id, stateFileExtension) 164 list, err := filepath.Glob(pattern) 165 if err != nil { 166 return nil, err 167 } 168 var out []FullID 169 for _, path := range list { 170 id, err := parseFileName(filepath.Base(path)) 171 if err == nil { 172 out = append(out, id) 173 } 174 } 175 return out, nil 176 } 177 178 // LoadSandbox loads all containers that belong to the sandbox with the given 179 // ID. 180 func LoadSandbox(rootDir, id string, opts LoadOpts) ([]*Container, error) { 181 cids, err := listMatch(rootDir, FullID{SandboxID: id}) 182 if err != nil { 183 return nil, err 184 } 185 186 // Override load options that don't make sense in the context of this function. 187 opts.SkipCheck = true // We're loading all containers irrespective of status. 188 opts.RootContainer = false // We're loading all containers, not just the root one. 189 opts.Exact = true // We'll iterate over exact container IDs below. 190 191 // Load the container metadata. 192 var containers []*Container 193 for _, cid := range cids { 194 container, err := Load(rootDir, cid, opts) 195 if err != nil { 196 // Container file may not exist if it raced with creation/deletion or 197 // directory was left behind. Load provides a snapshot in time, so it's 198 // fine to skip it. 199 if os.IsNotExist(err) { 200 continue 201 } 202 return nil, fmt.Errorf("loading sandbox %q, failed to load container %q: %v", id, cid, err) 203 } 204 containers = append(containers, container) 205 } 206 return containers, nil 207 } 208 209 func findContainerID(rootDir, partialID string) (FullID, error) { 210 // Check whether the id fully specifies an existing container. 211 pattern := buildPath(rootDir, FullID{SandboxID: "*", ContainerID: partialID + "*"}, stateFileExtension) 212 list, err := filepath.Glob(pattern) 213 if err != nil { 214 return FullID{}, err 215 } 216 switch len(list) { 217 case 0: 218 return FullID{}, os.ErrNotExist 219 case 1: 220 return parseFileName(filepath.Base(list[0])) 221 } 222 223 // Now see whether id could be an abbreviation of exactly 1 of the 224 // container ids. If id is ambiguous (it could match more than 1 225 // container), it is an error. 226 ids, err := List(rootDir) 227 if err != nil { 228 return FullID{}, err 229 } 230 var rv *FullID 231 for _, id := range ids { 232 if strings.HasPrefix(id.ContainerID, partialID) { 233 if rv != nil { 234 return FullID{}, fmt.Errorf("id %q is ambiguous and could refer to multiple containers: %q, %q", partialID, rv, id) 235 } 236 rv = &id 237 } 238 } 239 if rv == nil { 240 return FullID{}, os.ErrNotExist 241 } 242 log.Debugf("abbreviated id %q resolves to full id %v", partialID, *rv) 243 return *rv, nil 244 } 245 246 func parseFileName(name string) (FullID, error) { 247 re := regexp.MustCompile(`([\w+-\.]+)_sandbox:([\w+-\.]+)\.` + stateFileExtension) 248 groups := re.FindStringSubmatch(name) 249 if len(groups) != 3 { 250 return FullID{}, fmt.Errorf("invalid state file name format: %q", name) 251 } 252 id := FullID{ 253 SandboxID: groups[2], 254 ContainerID: groups[1], 255 } 256 if err := id.validate(); err != nil { 257 return FullID{}, fmt.Errorf("invalid state file name %q: %w", name, err) 258 } 259 return id, nil 260 } 261 262 // FullID combines sandbox and container ID to identify a container. Sandbox ID 263 // is used to allow all containers for a given sandbox to be loaded by matching 264 // sandbox ID in the file name. 265 type FullID struct { 266 SandboxID string `json:"sandboxId"` 267 ContainerID string `json:"containerId"` 268 } 269 270 func (f *FullID) String() string { 271 return f.SandboxID + "/" + f.ContainerID 272 } 273 274 func (f *FullID) validate() error { 275 if err := validateID(f.SandboxID); err != nil { 276 return err 277 } 278 return validateID(f.ContainerID) 279 } 280 281 // StateFile handles load from/save to container state safely from multiple 282 // processes. It uses a lock file to provide synchronization between operations. 283 // 284 // The lock file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.lock". 285 // The state file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.state". 286 type StateFile struct { 287 // RootDir is the directory containing the container metadata file. 288 RootDir string `json:"rootDir"` 289 290 // ID is the sandbox+container ID. 291 ID FullID `json:"id"` 292 293 // 294 // Fields below this line are not saved in the state file and will not 295 // be preserved across commands. 296 // 297 298 once sync.Once `nojson:"true"` 299 flock *flock.Flock `nojson:"true"` 300 } 301 302 // lock globally locks all locking operations for the container. 303 func (s *StateFile) lock(tryLock TryLock) error { 304 s.once.Do(func() { 305 s.flock = flock.New(s.lockPath()) 306 }) 307 308 if tryLock { 309 gotLock, err := s.flock.TryLock() 310 if err != nil { 311 return fmt.Errorf("acquiring lock on %q: %v", s.flock, err) 312 } 313 if !gotLock { 314 return ErrStateFileLocked 315 } 316 } else { 317 if err := s.flock.Lock(); err != nil { 318 return fmt.Errorf("acquiring lock on %q: %v", s.flock, err) 319 } 320 } 321 return nil 322 } 323 324 // LockForNew acquires the lock and checks if the state file doesn't exist. This 325 // is done to ensure that more than one creation didn't race to create 326 // containers with the same ID. 327 func (s *StateFile) LockForNew() error { 328 if err := s.lock(BlockAcquire); err != nil { 329 return err 330 } 331 332 // Checks if the container already exists by looking for the metadata file. 333 if _, err := os.Stat(s.statePath()); err == nil { 334 s.UnlockOrDie() 335 return fmt.Errorf("container already exists") 336 } else if !os.IsNotExist(err) { 337 s.UnlockOrDie() 338 return fmt.Errorf("looking for existing container: %v", err) 339 } 340 return nil 341 } 342 343 // unlock globally unlocks all locking operations for the container. 344 func (s *StateFile) unlock() error { 345 if !s.flock.Locked() { 346 panic("unlock called without lock held") 347 } 348 349 if err := s.flock.Unlock(); err != nil { 350 log.Warningf("Error to release lock on %q: %v", s.flock, err) 351 return fmt.Errorf("releasing lock on %q: %v", s.flock, err) 352 } 353 return nil 354 } 355 356 // UnlockOrDie is the same as unlock() but panics in case of failure. 357 func (s *StateFile) UnlockOrDie() { 358 if !s.flock.Locked() { 359 panic("unlock called without lock held") 360 } 361 if err := s.flock.Unlock(); err != nil { 362 panic(fmt.Sprintf("Error releasing lock on %q: %v", s.flock, err)) 363 } 364 } 365 366 // SaveLocked saves 'v' to the state file. 367 // 368 // Preconditions: lock(*) must been called before. 369 func (s *StateFile) SaveLocked(v any) error { 370 if !s.flock.Locked() { 371 panic("saveLocked called without lock held") 372 } 373 374 meta, err := json.Marshal(v) 375 if err != nil { 376 return err 377 } 378 if err := ioutil.WriteFile(s.statePath(), meta, 0640); err != nil { 379 return fmt.Errorf("writing json file: %v", err) 380 } 381 return nil 382 } 383 384 // Stat returns the result of calling stat() on the state file. 385 // Doing so does not require locking. 386 func (s *StateFile) Stat() (os.FileInfo, error) { 387 return os.Stat(s.statePath()) 388 } 389 390 func (s *StateFile) load(v any, opts LoadOpts) error { 391 if err := s.lock(opts.TryLock); err != nil { 392 return err 393 } 394 defer s.UnlockOrDie() 395 396 metaBytes, err := ioutil.ReadFile(s.statePath()) 397 if err != nil { 398 return err 399 } 400 return json.Unmarshal(metaBytes, &v) 401 } 402 403 func (s *StateFile) close() error { 404 if s.flock == nil { 405 return nil 406 } 407 if s.flock.Locked() { 408 panic("Closing locked file") 409 } 410 return s.flock.Close() 411 } 412 413 func buildPath(rootDir string, id FullID, extension string) string { 414 // Note: "_" and ":" are not valid in IDs. 415 name := fmt.Sprintf("%s_sandbox:%s.%s", id.ContainerID, id.SandboxID, extension) 416 return filepath.Join(rootDir, name) 417 } 418 419 // statePath is the full path to the state file. 420 func (s *StateFile) statePath() string { 421 return buildPath(s.RootDir, s.ID, stateFileExtension) 422 } 423 424 // lockPath is the full path to the lock file. 425 func (s *StateFile) lockPath() string { 426 return buildPath(s.RootDir, s.ID, "lock") 427 } 428 429 // Destroy deletes all state created by the stateFile. It may be called with the 430 // lock file held. In that case, the lock file must still be unlocked and 431 // properly closed after destroy returns. 432 func (s *StateFile) Destroy() error { 433 if err := os.Remove(s.statePath()); err != nil && !os.IsNotExist(err) { 434 return err 435 } 436 if err := os.Remove(s.lockPath()); err != nil && !os.IsNotExist(err) { 437 return err 438 } 439 return nil 440 }