github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/runsc/container/state_file.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package container
    16  
    17  import (
    18  	"encoding/json"
    19  	"errors"
    20  	"fmt"
    21  	"io/ioutil"
    22  	"os"
    23  	"path/filepath"
    24  	"regexp"
    25  	"strings"
    26  
    27  	"github.com/MerlinKodo/gvisor/pkg/log"
    28  	"github.com/MerlinKodo/gvisor/pkg/sync"
    29  	"github.com/gofrs/flock"
    30  	"golang.org/x/sys/unix"
    31  )
    32  
    33  const stateFileExtension = "state"
    34  
    35  // ErrStateFileLocked is returned by Load() when the state file is locked
    36  // and TryLock is enabled.
    37  var ErrStateFileLocked = errors.New("state file locked")
    38  
    39  // TryLock represents whether we should block waiting for the lock to be acquired or not.
    40  type TryLock bool
    41  
    42  const (
    43  	// BlockAcquire means we will block until the lock can be acquired.
    44  	BlockAcquire TryLock = false
    45  
    46  	// TryAcquire means we will fail fast if the lock cannot be acquired.
    47  	TryAcquire TryLock = true
    48  )
    49  
    50  // LoadOpts provides options for Load()ing a container.
    51  type LoadOpts struct {
    52  	// Exact tells whether the search should be exact. See Load() for more.
    53  	Exact bool
    54  
    55  	// SkipCheck tells Load() to skip checking if container is runnning.
    56  	SkipCheck bool
    57  
    58  	// TryLock tells Load() to fail if the container state file cannot be locked,
    59  	// as opposed to blocking until it is available.
    60  	// When the state file cannot be locked, it will error with ErrStateFileLocked.
    61  	TryLock TryLock
    62  
    63  	// RootContainer when true matches the search only with the root container of
    64  	// a sandbox. This is used when looking for a sandbox given that root
    65  	// container and sandbox share the same ID.
    66  	RootContainer bool
    67  }
    68  
    69  // Load loads a container with the given id from a metadata file. "id" may
    70  // be an abbreviation of the full container id in case LoadOpts.Exact if not
    71  // set. It also checks if the container is still running, in order to return
    72  // an error to the caller earlier. This check is skipped if LoadOpts.SkipCheck
    73  // is set.
    74  //
    75  // Returns ErrNotExist if no container is found. Returns error in case more than
    76  // one containers matching the ID prefix is found.
    77  func Load(rootDir string, id FullID, opts LoadOpts) (*Container, error) {
    78  	log.Debugf("Load container, rootDir: %q, id: %+v, opts: %+v", rootDir, id, opts)
    79  	if !opts.Exact {
    80  		var err error
    81  		id, err = findContainerID(rootDir, id.ContainerID)
    82  		if err != nil {
    83  			// Preserve error so that callers can distinguish 'not found' errors.
    84  			return nil, err
    85  		}
    86  	}
    87  
    88  	if err := id.validate(); err != nil {
    89  		return nil, fmt.Errorf("invalid container id: %v", err)
    90  	}
    91  	state := StateFile{
    92  		RootDir: rootDir,
    93  		ID:      id,
    94  	}
    95  	defer state.close()
    96  
    97  	c := &Container{}
    98  	if err := state.load(c, opts); err != nil {
    99  		if os.IsNotExist(err) {
   100  			// Preserve error so that callers can distinguish 'not found' errors.
   101  			return nil, err
   102  		}
   103  		return nil, fmt.Errorf("reading container metadata file %q: %v", state.statePath(), err)
   104  	}
   105  
   106  	if opts.RootContainer && c.ID != c.Sandbox.ID {
   107  		return nil, fmt.Errorf("ID %q doesn't belong to a sandbox", id)
   108  	}
   109  
   110  	if !opts.SkipCheck {
   111  		// If the status is "Running" or "Created", check that the sandbox/container
   112  		// is still running, setting it to Stopped if not.
   113  		//
   114  		// This is inherently racy.
   115  		switch c.Status {
   116  		case Created:
   117  			if !c.IsSandboxRunning() {
   118  				// Sandbox no longer exists, so this container definitely does not exist.
   119  				c.changeStatus(Stopped)
   120  			}
   121  		case Running:
   122  			if err := c.SignalContainer(unix.Signal(0), false); err != nil {
   123  				c.changeStatus(Stopped)
   124  			}
   125  		}
   126  	}
   127  
   128  	return c, nil
   129  }
   130  
   131  // List returns all container ids in the given root directory.
   132  func List(rootDir string) ([]FullID, error) {
   133  	log.Debugf("List containers %q", rootDir)
   134  	return listMatch(rootDir, FullID{})
   135  }
   136  
   137  // ListSandboxes returns all sandbox ids in the given root directory.
   138  func ListSandboxes(rootDir string) ([]FullID, error) {
   139  	log.Debugf("List containers %q", rootDir)
   140  	ids, err := List(rootDir)
   141  	if err != nil {
   142  		return nil, err
   143  	}
   144  
   145  	sandboxes := make(map[string]struct{}, len(ids))
   146  	for _, id := range ids {
   147  		sandboxes[id.SandboxID] = struct{}{}
   148  	}
   149  	// Reset ids to list only sandboxes.
   150  	ids = nil
   151  	for id := range sandboxes {
   152  		ids = append(ids, FullID{SandboxID: id, ContainerID: id})
   153  	}
   154  	return ids, nil
   155  }
   156  
   157  // listMatch returns all container ids that match the provided id.
   158  func listMatch(rootDir string, id FullID) ([]FullID, error) {
   159  	id.SandboxID += "*"
   160  	id.ContainerID += "*"
   161  	pattern := buildPath(rootDir, id, stateFileExtension)
   162  	list, err := filepath.Glob(pattern)
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  	var out []FullID
   167  	for _, path := range list {
   168  		id, err := parseFileName(filepath.Base(path))
   169  		if err == nil {
   170  			out = append(out, id)
   171  		}
   172  	}
   173  	return out, nil
   174  }
   175  
   176  // LoadSandbox loads all containers that belong to the sandbox with the given
   177  // ID.
   178  func LoadSandbox(rootDir, id string, opts LoadOpts) ([]*Container, error) {
   179  	cids, err := listMatch(rootDir, FullID{SandboxID: id})
   180  	if err != nil {
   181  		return nil, err
   182  	}
   183  
   184  	// Override load options that don't make sense in the context of this function.
   185  	opts.SkipCheck = true      // We're loading all containers irrespective of status.
   186  	opts.RootContainer = false // We're loading all containers, not just the root one.
   187  	opts.Exact = true          // We'll iterate over exact container IDs below.
   188  
   189  	// Load the container metadata.
   190  	var containers []*Container
   191  	for _, cid := range cids {
   192  		container, err := Load(rootDir, cid, opts)
   193  		if err != nil {
   194  			// Container file may not exist if it raced with creation/deletion or
   195  			// directory was left behind. Load provides a snapshot in time, so it's
   196  			// fine to skip it.
   197  			if os.IsNotExist(err) {
   198  				continue
   199  			}
   200  			return nil, fmt.Errorf("loading sandbox %q, failed to load container %q: %v", id, cid, err)
   201  		}
   202  		containers = append(containers, container)
   203  	}
   204  	return containers, nil
   205  }
   206  
   207  func findContainerID(rootDir, partialID string) (FullID, error) {
   208  	// Check whether the id fully specifies an existing container.
   209  	pattern := buildPath(rootDir, FullID{SandboxID: "*", ContainerID: partialID + "*"}, stateFileExtension)
   210  	list, err := filepath.Glob(pattern)
   211  	if err != nil {
   212  		return FullID{}, err
   213  	}
   214  	switch len(list) {
   215  	case 0:
   216  		return FullID{}, os.ErrNotExist
   217  	case 1:
   218  		return parseFileName(filepath.Base(list[0]))
   219  	}
   220  
   221  	// Now see whether id could be an abbreviation of exactly 1 of the
   222  	// container ids. If id is ambiguous (it could match more than 1
   223  	// container), it is an error.
   224  	ids, err := List(rootDir)
   225  	if err != nil {
   226  		return FullID{}, err
   227  	}
   228  	var rv *FullID
   229  	for _, id := range ids {
   230  		if strings.HasPrefix(id.ContainerID, partialID) {
   231  			if rv != nil {
   232  				return FullID{}, fmt.Errorf("id %q is ambiguous and could refer to multiple containers: %q, %q", partialID, rv, id)
   233  			}
   234  			rv = &id
   235  		}
   236  	}
   237  	if rv == nil {
   238  		return FullID{}, os.ErrNotExist
   239  	}
   240  	log.Debugf("abbreviated id %q resolves to full id %v", partialID, *rv)
   241  	return *rv, nil
   242  }
   243  
   244  func parseFileName(name string) (FullID, error) {
   245  	re := regexp.MustCompile(`([\w+-\.]+)_sandbox:([\w+-\.]+)\.` + stateFileExtension)
   246  	groups := re.FindStringSubmatch(name)
   247  	if len(groups) != 3 {
   248  		return FullID{}, fmt.Errorf("invalid state file name format: %q", name)
   249  	}
   250  	id := FullID{
   251  		SandboxID:   groups[2],
   252  		ContainerID: groups[1],
   253  	}
   254  	if err := id.validate(); err != nil {
   255  		return FullID{}, fmt.Errorf("invalid state file name %q: %w", name, err)
   256  	}
   257  	return id, nil
   258  }
   259  
   260  // FullID combines sandbox and container ID to identify a container. Sandbox ID
   261  // is used to allow all containers for a given sandbox to be loaded by matching
   262  // sandbox ID in the file name.
   263  type FullID struct {
   264  	SandboxID   string `json:"sandboxId"`
   265  	ContainerID string `json:"containerId"`
   266  }
   267  
   268  func (f *FullID) String() string {
   269  	return f.SandboxID + "/" + f.ContainerID
   270  }
   271  
   272  func (f *FullID) validate() error {
   273  	if err := validateID(f.SandboxID); err != nil {
   274  		return err
   275  	}
   276  	return validateID(f.ContainerID)
   277  }
   278  
   279  // StateFile handles load from/save to container state safely from multiple
   280  // processes. It uses a lock file to provide synchronization between operations.
   281  //
   282  // The lock file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.lock".
   283  // The state file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.state".
   284  type StateFile struct {
   285  	// RootDir is the directory containing the container metadata file.
   286  	RootDir string `json:"rootDir"`
   287  
   288  	// ID is the sandbox+container ID.
   289  	ID FullID `json:"id"`
   290  
   291  	//
   292  	// Fields below this line are not saved in the state file and will not
   293  	// be preserved across commands.
   294  	//
   295  
   296  	once  sync.Once    `nojson:"true"`
   297  	flock *flock.Flock `nojson:"true"`
   298  }
   299  
   300  // lock globally locks all locking operations for the container.
   301  func (s *StateFile) lock(tryLock TryLock) error {
   302  	s.once.Do(func() {
   303  		s.flock = flock.New(s.lockPath())
   304  	})
   305  
   306  	if tryLock {
   307  		gotLock, err := s.flock.TryLock()
   308  		if err != nil {
   309  			return fmt.Errorf("acquiring lock on %q: %v", s.flock, err)
   310  		}
   311  		if !gotLock {
   312  			return ErrStateFileLocked
   313  		}
   314  	} else {
   315  		if err := s.flock.Lock(); err != nil {
   316  			return fmt.Errorf("acquiring lock on %q: %v", s.flock, err)
   317  		}
   318  	}
   319  	return nil
   320  }
   321  
   322  // LockForNew acquires the lock and checks if the state file doesn't exist. This
   323  // is done to ensure that more than one creation didn't race to create
   324  // containers with the same ID.
   325  func (s *StateFile) LockForNew() error {
   326  	if err := s.lock(BlockAcquire); err != nil {
   327  		return err
   328  	}
   329  
   330  	// Checks if the container already exists by looking for the metadata file.
   331  	if _, err := os.Stat(s.statePath()); err == nil {
   332  		s.UnlockOrDie()
   333  		return fmt.Errorf("container already exists")
   334  	} else if !os.IsNotExist(err) {
   335  		s.UnlockOrDie()
   336  		return fmt.Errorf("looking for existing container: %v", err)
   337  	}
   338  	return nil
   339  }
   340  
   341  // unlock globally unlocks all locking operations for the container.
   342  func (s *StateFile) unlock() error {
   343  	if !s.flock.Locked() {
   344  		panic("unlock called without lock held")
   345  	}
   346  
   347  	if err := s.flock.Unlock(); err != nil {
   348  		log.Warningf("Error to release lock on %q: %v", s.flock, err)
   349  		return fmt.Errorf("releasing lock on %q: %v", s.flock, err)
   350  	}
   351  	return nil
   352  }
   353  
   354  // UnlockOrDie is the same as unlock() but panics in case of failure.
   355  func (s *StateFile) UnlockOrDie() {
   356  	if !s.flock.Locked() {
   357  		panic("unlock called without lock held")
   358  	}
   359  	if err := s.flock.Unlock(); err != nil {
   360  		panic(fmt.Sprintf("Error releasing lock on %q: %v", s.flock, err))
   361  	}
   362  }
   363  
   364  // SaveLocked saves 'v' to the state file.
   365  //
   366  // Preconditions: lock(*) must been called before.
   367  func (s *StateFile) SaveLocked(v any) error {
   368  	if !s.flock.Locked() {
   369  		panic("saveLocked called without lock held")
   370  	}
   371  
   372  	meta, err := json.Marshal(v)
   373  	if err != nil {
   374  		return err
   375  	}
   376  	if err := ioutil.WriteFile(s.statePath(), meta, 0640); err != nil {
   377  		return fmt.Errorf("writing json file: %v", err)
   378  	}
   379  	return nil
   380  }
   381  
   382  // Stat returns the result of calling stat() on the state file.
   383  // Doing so does not require locking.
   384  func (s *StateFile) Stat() (os.FileInfo, error) {
   385  	return os.Stat(s.statePath())
   386  }
   387  
   388  func (s *StateFile) load(v any, opts LoadOpts) error {
   389  	if err := s.lock(opts.TryLock); err != nil {
   390  		return err
   391  	}
   392  	defer s.UnlockOrDie()
   393  
   394  	metaBytes, err := ioutil.ReadFile(s.statePath())
   395  	if err != nil {
   396  		return err
   397  	}
   398  	return json.Unmarshal(metaBytes, &v)
   399  }
   400  
   401  func (s *StateFile) close() error {
   402  	if s.flock == nil {
   403  		return nil
   404  	}
   405  	if s.flock.Locked() {
   406  		panic("Closing locked file")
   407  	}
   408  	return s.flock.Close()
   409  }
   410  
   411  func buildPath(rootDir string, id FullID, extension string) string {
   412  	// Note: "_" and ":" are not valid in IDs.
   413  	name := fmt.Sprintf("%s_sandbox:%s.%s", id.ContainerID, id.SandboxID, extension)
   414  	return filepath.Join(rootDir, name)
   415  }
   416  
   417  // statePath is the full path to the state file.
   418  func (s *StateFile) statePath() string {
   419  	return buildPath(s.RootDir, s.ID, stateFileExtension)
   420  }
   421  
   422  // lockPath is the full path to the lock file.
   423  func (s *StateFile) lockPath() string {
   424  	return buildPath(s.RootDir, s.ID, "lock")
   425  }
   426  
   427  // Destroy deletes all state created by the stateFile. It may be called with the
   428  // lock file held. In that case, the lock file must still be unlocked and
   429  // properly closed after destroy returns.
   430  func (s *StateFile) Destroy() error {
   431  	if err := os.Remove(s.statePath()); err != nil && !os.IsNotExist(err) {
   432  		return err
   433  	}
   434  	if err := os.Remove(s.lockPath()); err != nil && !os.IsNotExist(err) {
   435  		return err
   436  	}
   437  	return nil
   438  }