gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/runsc/container/state_file.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package container
    16  
    17  import (
    18  	"encoding/json"
    19  	"errors"
    20  	"fmt"
    21  	"io/ioutil"
    22  	"os"
    23  	"path/filepath"
    24  	"regexp"
    25  	"strings"
    26  
    27  	"github.com/gofrs/flock"
    28  	"gvisor.dev/gvisor/pkg/log"
    29  	"gvisor.dev/gvisor/pkg/sync"
    30  )
    31  
    32  const stateFileExtension = "state"
    33  
    34  // ErrStateFileLocked is returned by Load() when the state file is locked
    35  // and TryLock is enabled.
    36  var ErrStateFileLocked = errors.New("state file locked")
    37  
    38  // TryLock represents whether we should block waiting for the lock to be acquired or not.
    39  type TryLock bool
    40  
    41  const (
    42  	// BlockAcquire means we will block until the lock can be acquired.
    43  	BlockAcquire TryLock = false
    44  
    45  	// TryAcquire means we will fail fast if the lock cannot be acquired.
    46  	TryAcquire TryLock = true
    47  )
    48  
    49  // LoadOpts provides options for Load()ing a container.
    50  type LoadOpts struct {
    51  	// Exact tells whether the search should be exact. See Load() for more.
    52  	Exact bool
    53  
    54  	// SkipCheck tells Load() to skip checking if container is runnning.
    55  	SkipCheck bool
    56  
    57  	// TryLock tells Load() to fail if the container state file cannot be locked,
    58  	// as opposed to blocking until it is available.
    59  	// When the state file cannot be locked, it will error with ErrStateFileLocked.
    60  	TryLock TryLock
    61  
    62  	// RootContainer when true matches the search only with the root container of
    63  	// a sandbox. This is used when looking for a sandbox given that root
    64  	// container and sandbox share the same ID.
    65  	RootContainer bool
    66  }
    67  
    68  // Load loads a container with the given id from a metadata file. "id" may
    69  // be an abbreviation of the full container id in case LoadOpts.Exact if not
    70  // set. It also checks if the container is still running, in order to return
    71  // an error to the caller earlier. This check is skipped if LoadOpts.SkipCheck
    72  // is set.
    73  //
    74  // Returns ErrNotExist if no container is found. Returns error in case more than
    75  // one containers matching the ID prefix is found.
    76  func Load(rootDir string, id FullID, opts LoadOpts) (*Container, error) {
    77  	log.Debugf("Load container, rootDir: %q, id: %+v, opts: %+v", rootDir, id, opts)
    78  	if !opts.Exact {
    79  		var err error
    80  		id, err = findContainerID(rootDir, id.ContainerID)
    81  		if err != nil {
    82  			// Preserve error so that callers can distinguish 'not found' errors.
    83  			return nil, err
    84  		}
    85  	}
    86  
    87  	if err := id.validate(); err != nil {
    88  		return nil, fmt.Errorf("invalid container id: %v", err)
    89  	}
    90  	state := StateFile{
    91  		RootDir: rootDir,
    92  		ID:      id,
    93  	}
    94  	defer state.close()
    95  
    96  	c := &Container{}
    97  	if err := state.load(c, opts); err != nil {
    98  		if os.IsNotExist(err) {
    99  			// Preserve error so that callers can distinguish 'not found' errors.
   100  			return nil, err
   101  		}
   102  		return nil, fmt.Errorf("reading container metadata file %q: %v", state.statePath(), err)
   103  	}
   104  
   105  	if opts.RootContainer && c.ID != c.Sandbox.ID {
   106  		return nil, fmt.Errorf("ID %q doesn't belong to a sandbox", id)
   107  	}
   108  
   109  	if !opts.SkipCheck {
   110  		// If the status is "Running" or "Created", check that the sandbox/container
   111  		// is still running, setting it to Stopped if not.
   112  		//
   113  		// This is inherently racy.
   114  		switch c.Status {
   115  		case Created, Running:
   116  			c.CheckStopped()
   117  		}
   118  	}
   119  
   120  	return c, nil
   121  }
   122  
   123  // List returns all container ids in the given root directory.
   124  func List(rootDir string) ([]FullID, error) {
   125  	log.Debugf("List containers %q", rootDir)
   126  	return listMatch(rootDir, FullID{})
   127  }
   128  
   129  // ListSandboxes returns all sandbox ids in the given root directory.
   130  func ListSandboxes(rootDir string) ([]FullID, error) {
   131  	log.Debugf("List containers %q", rootDir)
   132  	ids, err := List(rootDir)
   133  	if err != nil {
   134  		return nil, err
   135  	}
   136  
   137  	sandboxes := make(map[string]struct{}, len(ids))
   138  	for _, id := range ids {
   139  		sandboxes[id.SandboxID] = struct{}{}
   140  	}
   141  	// Reset ids to list only sandboxes.
   142  	ids = nil
   143  	for id := range sandboxes {
   144  		ids = append(ids, FullID{SandboxID: id, ContainerID: id})
   145  	}
   146  	return ids, nil
   147  }
   148  
   149  // listMatch returns all container ids that match the provided id.
   150  func listMatch(rootDir string, id FullID) ([]FullID, error) {
   151  	id.SandboxID += "*"
   152  	id.ContainerID += "*"
   153  	pattern := buildPath(rootDir, id, stateFileExtension)
   154  	list, err := filepath.Glob(pattern)
   155  	if err != nil {
   156  		return nil, err
   157  	}
   158  	var out []FullID
   159  	for _, path := range list {
   160  		id, err := parseFileName(filepath.Base(path))
   161  		if err == nil {
   162  			out = append(out, id)
   163  		}
   164  	}
   165  	return out, nil
   166  }
   167  
   168  // LoadSandbox loads all containers that belong to the sandbox with the given
   169  // ID.
   170  func LoadSandbox(rootDir, id string, opts LoadOpts) ([]*Container, error) {
   171  	cids, err := listMatch(rootDir, FullID{SandboxID: id})
   172  	if err != nil {
   173  		return nil, err
   174  	}
   175  
   176  	// Override load options that don't make sense in the context of this function.
   177  	opts.SkipCheck = true      // We're loading all containers irrespective of status.
   178  	opts.RootContainer = false // We're loading all containers, not just the root one.
   179  	opts.Exact = true          // We'll iterate over exact container IDs below.
   180  
   181  	// Load the container metadata.
   182  	var containers []*Container
   183  	for _, cid := range cids {
   184  		container, err := Load(rootDir, cid, opts)
   185  		if err != nil {
   186  			// Container file may not exist if it raced with creation/deletion or
   187  			// directory was left behind. Load provides a snapshot in time, so it's
   188  			// fine to skip it.
   189  			if os.IsNotExist(err) {
   190  				continue
   191  			}
   192  			return nil, fmt.Errorf("loading sandbox %q, failed to load container %q: %v", id, cid, err)
   193  		}
   194  		containers = append(containers, container)
   195  	}
   196  	return containers, nil
   197  }
   198  
   199  func findContainerID(rootDir, partialID string) (FullID, error) {
   200  	// Check whether the id fully specifies an existing container.
   201  	pattern := buildPath(rootDir, FullID{SandboxID: "*", ContainerID: partialID + "*"}, stateFileExtension)
   202  	list, err := filepath.Glob(pattern)
   203  	if err != nil {
   204  		return FullID{}, err
   205  	}
   206  	switch len(list) {
   207  	case 0:
   208  		return FullID{}, os.ErrNotExist
   209  	case 1:
   210  		return parseFileName(filepath.Base(list[0]))
   211  	}
   212  
   213  	// Now see whether id could be an abbreviation of exactly 1 of the
   214  	// container ids. If id is ambiguous (it could match more than 1
   215  	// container), it is an error.
   216  	ids, err := List(rootDir)
   217  	if err != nil {
   218  		return FullID{}, err
   219  	}
   220  	var rv *FullID
   221  	for _, id := range ids {
   222  		if strings.HasPrefix(id.ContainerID, partialID) {
   223  			if rv != nil {
   224  				return FullID{}, fmt.Errorf("id %q is ambiguous and could refer to multiple containers: %q, %q", partialID, rv, id)
   225  			}
   226  			rv = &id
   227  		}
   228  	}
   229  	if rv == nil {
   230  		return FullID{}, os.ErrNotExist
   231  	}
   232  	log.Debugf("abbreviated id %q resolves to full id %v", partialID, *rv)
   233  	return *rv, nil
   234  }
   235  
   236  func parseFileName(name string) (FullID, error) {
   237  	re := regexp.MustCompile(`([\w+-\.]+)_sandbox:([\w+-\.]+)\.` + stateFileExtension)
   238  	groups := re.FindStringSubmatch(name)
   239  	if len(groups) != 3 {
   240  		return FullID{}, fmt.Errorf("invalid state file name format: %q", name)
   241  	}
   242  	id := FullID{
   243  		SandboxID:   groups[2],
   244  		ContainerID: groups[1],
   245  	}
   246  	if err := id.validate(); err != nil {
   247  		return FullID{}, fmt.Errorf("invalid state file name %q: %w", name, err)
   248  	}
   249  	return id, nil
   250  }
   251  
   252  // FullID combines sandbox and container ID to identify a container. Sandbox ID
   253  // is used to allow all containers for a given sandbox to be loaded by matching
   254  // sandbox ID in the file name.
   255  type FullID struct {
   256  	SandboxID   string `json:"sandboxId"`
   257  	ContainerID string `json:"containerId"`
   258  }
   259  
   260  func (f *FullID) String() string {
   261  	return f.SandboxID + "/" + f.ContainerID
   262  }
   263  
   264  func (f *FullID) validate() error {
   265  	if err := validateID(f.SandboxID); err != nil {
   266  		return err
   267  	}
   268  	return validateID(f.ContainerID)
   269  }
   270  
   271  // StateFile handles load from/save to container state safely from multiple
   272  // processes. It uses a lock file to provide synchronization between operations.
   273  //
   274  // The lock file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.lock".
   275  // The state file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.state".
   276  type StateFile struct {
   277  	// RootDir is the directory containing the container metadata file.
   278  	RootDir string `json:"rootDir"`
   279  
   280  	// ID is the sandbox+container ID.
   281  	ID FullID `json:"id"`
   282  
   283  	//
   284  	// Fields below this line are not saved in the state file and will not
   285  	// be preserved across commands.
   286  	//
   287  
   288  	once  sync.Once    `nojson:"true"`
   289  	flock *flock.Flock `nojson:"true"`
   290  }
   291  
   292  // lock globally locks all locking operations for the container.
   293  func (s *StateFile) lock(tryLock TryLock) error {
   294  	s.once.Do(func() {
   295  		s.flock = flock.New(s.lockPath())
   296  	})
   297  
   298  	if tryLock {
   299  		gotLock, err := s.flock.TryLock()
   300  		if err != nil {
   301  			return fmt.Errorf("acquiring lock on %q: %v", s.flock, err)
   302  		}
   303  		if !gotLock {
   304  			return ErrStateFileLocked
   305  		}
   306  	} else {
   307  		if err := s.flock.Lock(); err != nil {
   308  			return fmt.Errorf("acquiring lock on %q: %v", s.flock, err)
   309  		}
   310  	}
   311  	return nil
   312  }
   313  
   314  // LockForNew acquires the lock and checks if the state file doesn't exist. This
   315  // is done to ensure that more than one creation didn't race to create
   316  // containers with the same ID.
   317  func (s *StateFile) LockForNew() error {
   318  	if err := s.lock(BlockAcquire); err != nil {
   319  		return err
   320  	}
   321  
   322  	// Checks if the container already exists by looking for the metadata file.
   323  	if _, err := os.Stat(s.statePath()); err == nil {
   324  		s.UnlockOrDie()
   325  		return fmt.Errorf("container already exists")
   326  	} else if !os.IsNotExist(err) {
   327  		s.UnlockOrDie()
   328  		return fmt.Errorf("looking for existing container: %v", err)
   329  	}
   330  	return nil
   331  }
   332  
   333  // unlock globally unlocks all locking operations for the container.
   334  func (s *StateFile) unlock() error {
   335  	if !s.flock.Locked() {
   336  		panic("unlock called without lock held")
   337  	}
   338  
   339  	if err := s.flock.Unlock(); err != nil {
   340  		log.Warningf("Error to release lock on %q: %v", s.flock, err)
   341  		return fmt.Errorf("releasing lock on %q: %v", s.flock, err)
   342  	}
   343  	return nil
   344  }
   345  
   346  // UnlockOrDie is the same as unlock() but panics in case of failure.
   347  func (s *StateFile) UnlockOrDie() {
   348  	if !s.flock.Locked() {
   349  		panic("unlock called without lock held")
   350  	}
   351  	if err := s.flock.Unlock(); err != nil {
   352  		panic(fmt.Sprintf("Error releasing lock on %q: %v", s.flock, err))
   353  	}
   354  }
   355  
   356  // SaveLocked saves 'v' to the state file.
   357  //
   358  // Preconditions: lock(*) must been called before.
   359  func (s *StateFile) SaveLocked(v any) error {
   360  	if !s.flock.Locked() {
   361  		panic("saveLocked called without lock held")
   362  	}
   363  
   364  	meta, err := json.Marshal(v)
   365  	if err != nil {
   366  		return err
   367  	}
   368  	if err := ioutil.WriteFile(s.statePath(), meta, 0640); err != nil {
   369  		return fmt.Errorf("writing json file: %v", err)
   370  	}
   371  	return nil
   372  }
   373  
   374  // Stat returns the result of calling stat() on the state file.
   375  // Doing so does not require locking.
   376  func (s *StateFile) Stat() (os.FileInfo, error) {
   377  	return os.Stat(s.statePath())
   378  }
   379  
   380  func (s *StateFile) load(v any, opts LoadOpts) error {
   381  	if err := s.lock(opts.TryLock); err != nil {
   382  		return err
   383  	}
   384  	defer s.UnlockOrDie()
   385  
   386  	metaBytes, err := ioutil.ReadFile(s.statePath())
   387  	if err != nil {
   388  		return err
   389  	}
   390  	return json.Unmarshal(metaBytes, &v)
   391  }
   392  
   393  func (s *StateFile) close() error {
   394  	if s.flock == nil {
   395  		return nil
   396  	}
   397  	if s.flock.Locked() {
   398  		panic("Closing locked file")
   399  	}
   400  	return s.flock.Close()
   401  }
   402  
   403  func buildPath(rootDir string, id FullID, extension string) string {
   404  	// Note: "_" and ":" are not valid in IDs.
   405  	name := fmt.Sprintf("%s_sandbox:%s.%s", id.ContainerID, id.SandboxID, extension)
   406  	return filepath.Join(rootDir, name)
   407  }
   408  
   409  // statePath is the full path to the state file.
   410  func (s *StateFile) statePath() string {
   411  	return buildPath(s.RootDir, s.ID, stateFileExtension)
   412  }
   413  
   414  // lockPath is the full path to the lock file.
   415  func (s *StateFile) lockPath() string {
   416  	return buildPath(s.RootDir, s.ID, "lock")
   417  }
   418  
   419  // Destroy deletes all state created by the stateFile. It may be called with the
   420  // lock file held. In that case, the lock file must still be unlocked and
   421  // properly closed after destroy returns.
   422  func (s *StateFile) Destroy() error {
   423  	if err := os.Remove(s.statePath()); err != nil && !os.IsNotExist(err) {
   424  		return err
   425  	}
   426  	if err := os.Remove(s.lockPath()); err != nil && !os.IsNotExist(err) {
   427  		return err
   428  	}
   429  	return nil
   430  }