github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/runsc/container/state_file.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package container
    16  
    17  import (
    18  	"encoding/json"
    19  	"errors"
    20  	"fmt"
    21  	"io/ioutil"
    22  	"os"
    23  	"path/filepath"
    24  	"regexp"
    25  	"strings"
    26  
    27  	"github.com/gofrs/flock"
    28  	"golang.org/x/sys/unix"
    29  	"github.com/metacubex/gvisor/pkg/log"
    30  	"github.com/metacubex/gvisor/pkg/sync"
    31  )
    32  
    33  const stateFileExtension = "state"
    34  
    35  // ErrStateFileLocked is returned by Load() when the state file is locked
    36  // and TryLock is enabled.
    37  var ErrStateFileLocked = errors.New("state file locked")
    38  
    39  // TryLock represents whether we should block waiting for the lock to be acquired or not.
    40  type TryLock bool
    41  
    42  const (
    43  	// BlockAcquire means we will block until the lock can be acquired.
    44  	BlockAcquire TryLock = false
    45  
    46  	// TryAcquire means we will fail fast if the lock cannot be acquired.
    47  	TryAcquire TryLock = true
    48  )
    49  
    50  // LoadOpts provides options for Load()ing a container.
    51  type LoadOpts struct {
    52  	// Exact tells whether the search should be exact. See Load() for more.
    53  	Exact bool
    54  
    55  	// SkipCheck tells Load() to skip checking if container is runnning.
    56  	SkipCheck bool
    57  
    58  	// TryLock tells Load() to fail if the container state file cannot be locked,
    59  	// as opposed to blocking until it is available.
    60  	// When the state file cannot be locked, it will error with ErrStateFileLocked.
    61  	TryLock TryLock
    62  
    63  	// RootContainer when true matches the search only with the root container of
    64  	// a sandbox. This is used when looking for a sandbox given that root
    65  	// container and sandbox share the same ID.
    66  	RootContainer bool
    67  }
    68  
    69  // Load loads a container with the given id from a metadata file. "id" may
    70  // be an abbreviation of the full container id in case LoadOpts.Exact if not
    71  // set. It also checks if the container is still running, in order to return
    72  // an error to the caller earlier. This check is skipped if LoadOpts.SkipCheck
    73  // is set.
    74  //
    75  // Returns ErrNotExist if no container is found. Returns error in case more than
    76  // one containers matching the ID prefix is found.
    77  func Load(rootDir string, id FullID, opts LoadOpts) (*Container, error) {
    78  	log.Debugf("Load container, rootDir: %q, id: %+v, opts: %+v", rootDir, id, opts)
    79  	if !opts.Exact {
    80  		var err error
    81  		id, err = findContainerID(rootDir, id.ContainerID)
    82  		if err != nil {
    83  			// Preserve error so that callers can distinguish 'not found' errors.
    84  			return nil, err
    85  		}
    86  	}
    87  
    88  	if err := id.validate(); err != nil {
    89  		return nil, fmt.Errorf("invalid container id: %v", err)
    90  	}
    91  	state := StateFile{
    92  		RootDir: rootDir,
    93  		ID:      id,
    94  	}
    95  	defer state.close()
    96  
    97  	c := &Container{}
    98  	if err := state.load(c, opts); err != nil {
    99  		if os.IsNotExist(err) {
   100  			// Preserve error so that callers can distinguish 'not found' errors.
   101  			return nil, err
   102  		}
   103  		return nil, fmt.Errorf("reading container metadata file %q: %v", state.statePath(), err)
   104  	}
   105  
   106  	if opts.RootContainer && c.ID != c.Sandbox.ID {
   107  		return nil, fmt.Errorf("ID %q doesn't belong to a sandbox", id)
   108  	}
   109  
   110  	if !opts.SkipCheck {
   111  		// If the status is "Running" or "Created", check that the sandbox/container
   112  		// is still running, setting it to Stopped if not.
   113  		//
   114  		// This is inherently racy.
   115  		switch c.Status {
   116  		case Created:
   117  			if !c.IsSandboxRunning() {
   118  				// Sandbox no longer exists, so this container definitely does not exist.
   119  				log.Warningf("Process for sandbox %v is no longer running; assuming container is in stopped state", c.Sandbox.ID)
   120  				c.changeStatus(Stopped)
   121  			}
   122  		case Running:
   123  			if err := c.SignalContainer(unix.Signal(0), false); err != nil {
   124  				log.Warningf("Cannot signal container %v for sandbox %v (err: %v); assuming container is in stopped state", c.ID, c.Sandbox.ID, err)
   125  				c.changeStatus(Stopped)
   126  			}
   127  		}
   128  	}
   129  
   130  	return c, nil
   131  }
   132  
   133  // List returns all container ids in the given root directory.
   134  func List(rootDir string) ([]FullID, error) {
   135  	log.Debugf("List containers %q", rootDir)
   136  	return listMatch(rootDir, FullID{})
   137  }
   138  
   139  // ListSandboxes returns all sandbox ids in the given root directory.
   140  func ListSandboxes(rootDir string) ([]FullID, error) {
   141  	log.Debugf("List containers %q", rootDir)
   142  	ids, err := List(rootDir)
   143  	if err != nil {
   144  		return nil, err
   145  	}
   146  
   147  	sandboxes := make(map[string]struct{}, len(ids))
   148  	for _, id := range ids {
   149  		sandboxes[id.SandboxID] = struct{}{}
   150  	}
   151  	// Reset ids to list only sandboxes.
   152  	ids = nil
   153  	for id := range sandboxes {
   154  		ids = append(ids, FullID{SandboxID: id, ContainerID: id})
   155  	}
   156  	return ids, nil
   157  }
   158  
   159  // listMatch returns all container ids that match the provided id.
   160  func listMatch(rootDir string, id FullID) ([]FullID, error) {
   161  	id.SandboxID += "*"
   162  	id.ContainerID += "*"
   163  	pattern := buildPath(rootDir, id, stateFileExtension)
   164  	list, err := filepath.Glob(pattern)
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  	var out []FullID
   169  	for _, path := range list {
   170  		id, err := parseFileName(filepath.Base(path))
   171  		if err == nil {
   172  			out = append(out, id)
   173  		}
   174  	}
   175  	return out, nil
   176  }
   177  
   178  // LoadSandbox loads all containers that belong to the sandbox with the given
   179  // ID.
   180  func LoadSandbox(rootDir, id string, opts LoadOpts) ([]*Container, error) {
   181  	cids, err := listMatch(rootDir, FullID{SandboxID: id})
   182  	if err != nil {
   183  		return nil, err
   184  	}
   185  
   186  	// Override load options that don't make sense in the context of this function.
   187  	opts.SkipCheck = true      // We're loading all containers irrespective of status.
   188  	opts.RootContainer = false // We're loading all containers, not just the root one.
   189  	opts.Exact = true          // We'll iterate over exact container IDs below.
   190  
   191  	// Load the container metadata.
   192  	var containers []*Container
   193  	for _, cid := range cids {
   194  		container, err := Load(rootDir, cid, opts)
   195  		if err != nil {
   196  			// Container file may not exist if it raced with creation/deletion or
   197  			// directory was left behind. Load provides a snapshot in time, so it's
   198  			// fine to skip it.
   199  			if os.IsNotExist(err) {
   200  				continue
   201  			}
   202  			return nil, fmt.Errorf("loading sandbox %q, failed to load container %q: %v", id, cid, err)
   203  		}
   204  		containers = append(containers, container)
   205  	}
   206  	return containers, nil
   207  }
   208  
   209  func findContainerID(rootDir, partialID string) (FullID, error) {
   210  	// Check whether the id fully specifies an existing container.
   211  	pattern := buildPath(rootDir, FullID{SandboxID: "*", ContainerID: partialID + "*"}, stateFileExtension)
   212  	list, err := filepath.Glob(pattern)
   213  	if err != nil {
   214  		return FullID{}, err
   215  	}
   216  	switch len(list) {
   217  	case 0:
   218  		return FullID{}, os.ErrNotExist
   219  	case 1:
   220  		return parseFileName(filepath.Base(list[0]))
   221  	}
   222  
   223  	// Now see whether id could be an abbreviation of exactly 1 of the
   224  	// container ids. If id is ambiguous (it could match more than 1
   225  	// container), it is an error.
   226  	ids, err := List(rootDir)
   227  	if err != nil {
   228  		return FullID{}, err
   229  	}
   230  	var rv *FullID
   231  	for _, id := range ids {
   232  		if strings.HasPrefix(id.ContainerID, partialID) {
   233  			if rv != nil {
   234  				return FullID{}, fmt.Errorf("id %q is ambiguous and could refer to multiple containers: %q, %q", partialID, rv, id)
   235  			}
   236  			rv = &id
   237  		}
   238  	}
   239  	if rv == nil {
   240  		return FullID{}, os.ErrNotExist
   241  	}
   242  	log.Debugf("abbreviated id %q resolves to full id %v", partialID, *rv)
   243  	return *rv, nil
   244  }
   245  
   246  func parseFileName(name string) (FullID, error) {
   247  	re := regexp.MustCompile(`([\w+-\.]+)_sandbox:([\w+-\.]+)\.` + stateFileExtension)
   248  	groups := re.FindStringSubmatch(name)
   249  	if len(groups) != 3 {
   250  		return FullID{}, fmt.Errorf("invalid state file name format: %q", name)
   251  	}
   252  	id := FullID{
   253  		SandboxID:   groups[2],
   254  		ContainerID: groups[1],
   255  	}
   256  	if err := id.validate(); err != nil {
   257  		return FullID{}, fmt.Errorf("invalid state file name %q: %w", name, err)
   258  	}
   259  	return id, nil
   260  }
   261  
   262  // FullID combines sandbox and container ID to identify a container. Sandbox ID
   263  // is used to allow all containers for a given sandbox to be loaded by matching
   264  // sandbox ID in the file name.
   265  type FullID struct {
   266  	SandboxID   string `json:"sandboxId"`
   267  	ContainerID string `json:"containerId"`
   268  }
   269  
   270  func (f *FullID) String() string {
   271  	return f.SandboxID + "/" + f.ContainerID
   272  }
   273  
   274  func (f *FullID) validate() error {
   275  	if err := validateID(f.SandboxID); err != nil {
   276  		return err
   277  	}
   278  	return validateID(f.ContainerID)
   279  }
   280  
   281  // StateFile handles load from/save to container state safely from multiple
   282  // processes. It uses a lock file to provide synchronization between operations.
   283  //
   284  // The lock file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.lock".
   285  // The state file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.state".
   286  type StateFile struct {
   287  	// RootDir is the directory containing the container metadata file.
   288  	RootDir string `json:"rootDir"`
   289  
   290  	// ID is the sandbox+container ID.
   291  	ID FullID `json:"id"`
   292  
   293  	//
   294  	// Fields below this line are not saved in the state file and will not
   295  	// be preserved across commands.
   296  	//
   297  
   298  	once  sync.Once    `nojson:"true"`
   299  	flock *flock.Flock `nojson:"true"`
   300  }
   301  
   302  // lock globally locks all locking operations for the container.
   303  func (s *StateFile) lock(tryLock TryLock) error {
   304  	s.once.Do(func() {
   305  		s.flock = flock.New(s.lockPath())
   306  	})
   307  
   308  	if tryLock {
   309  		gotLock, err := s.flock.TryLock()
   310  		if err != nil {
   311  			return fmt.Errorf("acquiring lock on %q: %v", s.flock, err)
   312  		}
   313  		if !gotLock {
   314  			return ErrStateFileLocked
   315  		}
   316  	} else {
   317  		if err := s.flock.Lock(); err != nil {
   318  			return fmt.Errorf("acquiring lock on %q: %v", s.flock, err)
   319  		}
   320  	}
   321  	return nil
   322  }
   323  
   324  // LockForNew acquires the lock and checks if the state file doesn't exist. This
   325  // is done to ensure that more than one creation didn't race to create
   326  // containers with the same ID.
   327  func (s *StateFile) LockForNew() error {
   328  	if err := s.lock(BlockAcquire); err != nil {
   329  		return err
   330  	}
   331  
   332  	// Checks if the container already exists by looking for the metadata file.
   333  	if _, err := os.Stat(s.statePath()); err == nil {
   334  		s.UnlockOrDie()
   335  		return fmt.Errorf("container already exists")
   336  	} else if !os.IsNotExist(err) {
   337  		s.UnlockOrDie()
   338  		return fmt.Errorf("looking for existing container: %v", err)
   339  	}
   340  	return nil
   341  }
   342  
   343  // unlock globally unlocks all locking operations for the container.
   344  func (s *StateFile) unlock() error {
   345  	if !s.flock.Locked() {
   346  		panic("unlock called without lock held")
   347  	}
   348  
   349  	if err := s.flock.Unlock(); err != nil {
   350  		log.Warningf("Error to release lock on %q: %v", s.flock, err)
   351  		return fmt.Errorf("releasing lock on %q: %v", s.flock, err)
   352  	}
   353  	return nil
   354  }
   355  
   356  // UnlockOrDie is the same as unlock() but panics in case of failure.
   357  func (s *StateFile) UnlockOrDie() {
   358  	if !s.flock.Locked() {
   359  		panic("unlock called without lock held")
   360  	}
   361  	if err := s.flock.Unlock(); err != nil {
   362  		panic(fmt.Sprintf("Error releasing lock on %q: %v", s.flock, err))
   363  	}
   364  }
   365  
   366  // SaveLocked saves 'v' to the state file.
   367  //
   368  // Preconditions: lock(*) must been called before.
   369  func (s *StateFile) SaveLocked(v any) error {
   370  	if !s.flock.Locked() {
   371  		panic("saveLocked called without lock held")
   372  	}
   373  
   374  	meta, err := json.Marshal(v)
   375  	if err != nil {
   376  		return err
   377  	}
   378  	if err := ioutil.WriteFile(s.statePath(), meta, 0640); err != nil {
   379  		return fmt.Errorf("writing json file: %v", err)
   380  	}
   381  	return nil
   382  }
   383  
   384  // Stat returns the result of calling stat() on the state file.
   385  // Doing so does not require locking.
   386  func (s *StateFile) Stat() (os.FileInfo, error) {
   387  	return os.Stat(s.statePath())
   388  }
   389  
   390  func (s *StateFile) load(v any, opts LoadOpts) error {
   391  	if err := s.lock(opts.TryLock); err != nil {
   392  		return err
   393  	}
   394  	defer s.UnlockOrDie()
   395  
   396  	metaBytes, err := ioutil.ReadFile(s.statePath())
   397  	if err != nil {
   398  		return err
   399  	}
   400  	return json.Unmarshal(metaBytes, &v)
   401  }
   402  
   403  func (s *StateFile) close() error {
   404  	if s.flock == nil {
   405  		return nil
   406  	}
   407  	if s.flock.Locked() {
   408  		panic("Closing locked file")
   409  	}
   410  	return s.flock.Close()
   411  }
   412  
   413  func buildPath(rootDir string, id FullID, extension string) string {
   414  	// Note: "_" and ":" are not valid in IDs.
   415  	name := fmt.Sprintf("%s_sandbox:%s.%s", id.ContainerID, id.SandboxID, extension)
   416  	return filepath.Join(rootDir, name)
   417  }
   418  
   419  // statePath is the full path to the state file.
   420  func (s *StateFile) statePath() string {
   421  	return buildPath(s.RootDir, s.ID, stateFileExtension)
   422  }
   423  
   424  // lockPath is the full path to the lock file.
   425  func (s *StateFile) lockPath() string {
   426  	return buildPath(s.RootDir, s.ID, "lock")
   427  }
   428  
   429  // Destroy deletes all state created by the stateFile. It may be called with the
   430  // lock file held. In that case, the lock file must still be unlocked and
   431  // properly closed after destroy returns.
   432  func (s *StateFile) Destroy() error {
   433  	if err := os.Remove(s.statePath()); err != nil && !os.IsNotExist(err) {
   434  		return err
   435  	}
   436  	if err := os.Remove(s.lockPath()); err != nil && !os.IsNotExist(err) {
   437  		return err
   438  	}
   439  	return nil
   440  }