github.com/containerd/containerd@v22.0.0-20200918172823-438c87b8e050+incompatible/snapshots/devmapper/snapshotter.go (about)

     1  // +build linux
     2  
     3  /*
     4     Copyright The containerd Authors.
     5  
     6     Licensed under the Apache License, Version 2.0 (the "License");
     7     you may not use this file except in compliance with the License.
     8     You may obtain a copy of the License at
     9  
    10         http://www.apache.org/licenses/LICENSE-2.0
    11  
    12     Unless required by applicable law or agreed to in writing, software
    13     distributed under the License is distributed on an "AS IS" BASIS,
    14     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15     See the License for the specific language governing permissions and
    16     limitations under the License.
    17  */
    18  
    19  package devmapper
    20  
    21  import (
    22  	"context"
    23  	"fmt"
    24  	"os"
    25  	"os/exec"
    26  	"path/filepath"
    27  	"strings"
    28  	"sync"
    29  
    30  	"github.com/containerd/containerd/errdefs"
    31  	"github.com/containerd/containerd/log"
    32  	"github.com/containerd/containerd/mount"
    33  	"github.com/containerd/containerd/snapshots"
    34  	"github.com/containerd/containerd/snapshots/devmapper/dmsetup"
    35  	"github.com/containerd/containerd/snapshots/storage"
    36  	"github.com/hashicorp/go-multierror"
    37  	"github.com/pkg/errors"
    38  	"github.com/sirupsen/logrus"
    39  )
    40  
    41  const (
    42  	metadataFileName = "metadata.db"
    43  	fsTypeExt4       = "ext4"
    44  )
    45  
    46  type closeFunc func() error
    47  
    48  // Snapshotter implements containerd's snapshotter (https://godoc.org/github.com/containerd/containerd/snapshots#Snapshotter)
    49  // based on Linux device-mapper targets.
    50  type Snapshotter struct {
    51  	store     *storage.MetaStore
    52  	pool      *PoolDevice
    53  	config    *Config
    54  	cleanupFn []closeFunc
    55  	closeOnce sync.Once
    56  }
    57  
    58  // NewSnapshotter creates new device mapper snapshotter.
    59  // Internally it creates thin-pool device (or reloads if it's already exists) and
    60  // initializes a database file for metadata.
    61  func NewSnapshotter(ctx context.Context, config *Config) (*Snapshotter, error) {
    62  	// Make sure snapshotter configuration valid before running
    63  	if err := config.parse(); err != nil {
    64  		return nil, err
    65  	}
    66  
    67  	if err := config.Validate(); err != nil {
    68  		return nil, err
    69  	}
    70  
    71  	var cleanupFn []closeFunc
    72  
    73  	if err := os.MkdirAll(config.RootPath, 0750); err != nil && !os.IsExist(err) {
    74  		return nil, errors.Wrapf(err, "failed to create root directory: %s", config.RootPath)
    75  	}
    76  
    77  	store, err := storage.NewMetaStore(filepath.Join(config.RootPath, metadataFileName))
    78  	if err != nil {
    79  		return nil, errors.Wrap(err, "failed to create metastore")
    80  	}
    81  
    82  	cleanupFn = append(cleanupFn, store.Close)
    83  
    84  	poolDevice, err := NewPoolDevice(ctx, config)
    85  	if err != nil {
    86  		return nil, err
    87  	}
    88  
    89  	cleanupFn = append(cleanupFn, poolDevice.Close)
    90  
    91  	return &Snapshotter{
    92  		store:     store,
    93  		config:    config,
    94  		pool:      poolDevice,
    95  		cleanupFn: cleanupFn,
    96  	}, nil
    97  }
    98  
    99  // Stat returns the info for an active or committed snapshot from store
   100  func (s *Snapshotter) Stat(ctx context.Context, key string) (snapshots.Info, error) {
   101  	log.G(ctx).WithField("key", key).Debug("stat")
   102  
   103  	var (
   104  		info snapshots.Info
   105  		err  error
   106  	)
   107  
   108  	err = s.withTransaction(ctx, false, func(ctx context.Context) error {
   109  		_, info, _, err = storage.GetInfo(ctx, key)
   110  		return err
   111  	})
   112  
   113  	return info, err
   114  }
   115  
   116  // Update updates an existing snapshot info's data
   117  func (s *Snapshotter) Update(ctx context.Context, info snapshots.Info, fieldpaths ...string) (snapshots.Info, error) {
   118  	log.G(ctx).Debugf("update: %s", strings.Join(fieldpaths, ", "))
   119  
   120  	var err error
   121  	err = s.withTransaction(ctx, true, func(ctx context.Context) error {
   122  		info, err = storage.UpdateInfo(ctx, info, fieldpaths...)
   123  		return err
   124  	})
   125  
   126  	return info, err
   127  }
   128  
   129  // Usage returns the resource usage of an active or committed snapshot excluding the usage of parent snapshots.
   130  func (s *Snapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, error) {
   131  	log.G(ctx).WithField("key", key).Debug("usage")
   132  
   133  	var (
   134  		id    string
   135  		err   error
   136  		info  snapshots.Info
   137  		usage snapshots.Usage
   138  	)
   139  
   140  	err = s.withTransaction(ctx, false, func(ctx context.Context) error {
   141  		id, info, usage, err = storage.GetInfo(ctx, key)
   142  		if err != nil {
   143  			return err
   144  		}
   145  
   146  		if info.Kind == snapshots.KindActive {
   147  			deviceName := s.getDeviceName(id)
   148  			usage.Size, err = s.pool.GetUsage(deviceName)
   149  			if err != nil {
   150  				return err
   151  			}
   152  		}
   153  
   154  		if info.Parent != "" {
   155  			// GetInfo returns total number of bytes used by a snapshot (including parent).
   156  			// So subtract parent usage in order to get delta consumed by layer itself.
   157  			_, _, parentUsage, err := storage.GetInfo(ctx, info.Parent)
   158  			if err != nil {
   159  				return err
   160  			}
   161  
   162  			usage.Size -= parentUsage.Size
   163  		}
   164  
   165  		return err
   166  	})
   167  
   168  	return usage, err
   169  }
   170  
   171  // Mounts return the list of mounts for the active or view snapshot
   172  func (s *Snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, error) {
   173  	log.G(ctx).WithField("key", key).Debug("mounts")
   174  
   175  	var (
   176  		snap storage.Snapshot
   177  		err  error
   178  	)
   179  
   180  	err = s.withTransaction(ctx, false, func(ctx context.Context) error {
   181  		snap, err = storage.GetSnapshot(ctx, key)
   182  		return err
   183  	})
   184  
   185  	return s.buildMounts(snap), nil
   186  }
   187  
   188  // Prepare creates thin device for an active snapshot identified by key
   189  func (s *Snapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
   190  	log.G(ctx).WithFields(logrus.Fields{"key": key, "parent": parent}).Debug("prepare")
   191  
   192  	var (
   193  		mounts []mount.Mount
   194  		err    error
   195  	)
   196  
   197  	err = s.withTransaction(ctx, true, func(ctx context.Context) error {
   198  		mounts, err = s.createSnapshot(ctx, snapshots.KindActive, key, parent, opts...)
   199  		return err
   200  	})
   201  
   202  	return mounts, err
   203  }
   204  
   205  // View creates readonly thin device for the given snapshot key
   206  func (s *Snapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
   207  	log.G(ctx).WithFields(logrus.Fields{"key": key, "parent": parent}).Debug("prepare")
   208  
   209  	var (
   210  		mounts []mount.Mount
   211  		err    error
   212  	)
   213  
   214  	err = s.withTransaction(ctx, true, func(ctx context.Context) error {
   215  		mounts, err = s.createSnapshot(ctx, snapshots.KindView, key, parent, opts...)
   216  		return err
   217  	})
   218  
   219  	return mounts, err
   220  }
   221  
   222  // Commit marks an active snapshot as committed in meta store.
   223  // Block device unmount operation captures snapshot changes by itself, so no
   224  // additional actions needed within Commit operation.
   225  func (s *Snapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
   226  	log.G(ctx).WithFields(logrus.Fields{"name": name, "key": key}).Debug("commit")
   227  
   228  	return s.withTransaction(ctx, true, func(ctx context.Context) error {
   229  		id, _, _, err := storage.GetInfo(ctx, key)
   230  		if err != nil {
   231  			return err
   232  		}
   233  
   234  		deviceName := s.getDeviceName(id)
   235  		size, err := s.pool.GetUsage(deviceName)
   236  		if err != nil {
   237  			return err
   238  		}
   239  
   240  		usage := snapshots.Usage{
   241  			Size: size,
   242  		}
   243  
   244  		_, err = storage.CommitActive(ctx, key, name, usage, opts...)
   245  		if err != nil {
   246  			return err
   247  		}
   248  
   249  		// After committed, the snapshot device will not be directly
   250  		// used anymore. We'd better deativate it to make it *invisible*
   251  		// in userspace, so that tools like LVM2 and fdisk cannot touch it,
   252  		// and avoid useless IOs on it.
   253  		//
   254  		// Before deactivation, we need to flush the outstanding IO by suspend.
   255  		// Afterward, we resume it again to prevent a race window which may cause
   256  		// a process IO hang. See the issue below for details:
   257  		//   (https://github.com/containerd/containerd/issues/4234)
   258  		err = s.pool.SuspendDevice(ctx, deviceName)
   259  		if err != nil {
   260  			return err
   261  		}
   262  
   263  		err = s.pool.ResumeDevice(ctx, deviceName)
   264  		if err != nil {
   265  			return err
   266  		}
   267  
   268  		return s.pool.DeactivateDevice(ctx, deviceName, false, false)
   269  	})
   270  }
   271  
   272  // Remove removes thin device and snapshot metadata by key
   273  func (s *Snapshotter) Remove(ctx context.Context, key string) error {
   274  	log.G(ctx).WithField("key", key).Debug("remove")
   275  
   276  	return s.withTransaction(ctx, true, func(ctx context.Context) error {
   277  		return s.removeDevice(ctx, key)
   278  	})
   279  }
   280  
   281  func (s *Snapshotter) removeDevice(ctx context.Context, key string) error {
   282  	snapID, _, err := storage.Remove(ctx, key)
   283  	if err != nil {
   284  		return err
   285  	}
   286  
   287  	deviceName := s.getDeviceName(snapID)
   288  	if !s.config.AsyncRemove {
   289  		if err := s.pool.RemoveDevice(ctx, deviceName); err != nil {
   290  			log.G(ctx).WithError(err).Errorf("failed to remove device")
   291  			// Tell snapshot GC continue to collect other snapshots.
   292  			// Otherwise, one snapshot collection failure will stop
   293  			// the GC, and all snapshots won't be collected even though
   294  			// having no relationship with the failed one.
   295  			return errdefs.ErrFailedPrecondition
   296  		}
   297  	} else {
   298  		// The asynchronous cleanup will do the real device remove work.
   299  		log.G(ctx).WithField("device", deviceName).Debug("async remove")
   300  		if err := s.pool.MarkDeviceState(ctx, deviceName, Removed); err != nil {
   301  			log.G(ctx).WithError(err).Errorf("failed to mark device as removed")
   302  			return err
   303  		}
   304  	}
   305  
   306  	return nil
   307  }
   308  
   309  // Walk iterates through all metadata Info for the stored snapshots and calls the provided function for each.
   310  func (s *Snapshotter) Walk(ctx context.Context, fn snapshots.WalkFunc, fs ...string) error {
   311  	log.G(ctx).Debug("walk")
   312  	return s.withTransaction(ctx, false, func(ctx context.Context) error {
   313  		return storage.WalkInfo(ctx, fn, fs...)
   314  	})
   315  }
   316  
   317  // ResetPool deactivates and deletes all thin devices in thin-pool.
   318  // Used for cleaning pool after benchmarking.
   319  func (s *Snapshotter) ResetPool(ctx context.Context) error {
   320  	names, err := s.pool.metadata.GetDeviceNames(ctx)
   321  	if err != nil {
   322  		return err
   323  	}
   324  
   325  	var result *multierror.Error
   326  	for _, name := range names {
   327  		if err := s.pool.RemoveDevice(ctx, name); err != nil {
   328  			result = multierror.Append(result, err)
   329  		}
   330  	}
   331  
   332  	return result.ErrorOrNil()
   333  }
   334  
   335  // Close releases devmapper snapshotter resources.
   336  // All subsequent Close calls will be ignored.
   337  func (s *Snapshotter) Close() error {
   338  	log.L.Debug("close")
   339  
   340  	var result *multierror.Error
   341  	s.closeOnce.Do(func() {
   342  		for _, fn := range s.cleanupFn {
   343  			if err := fn(); err != nil {
   344  				result = multierror.Append(result, err)
   345  			}
   346  		}
   347  	})
   348  
   349  	return result.ErrorOrNil()
   350  }
   351  
   352  func (s *Snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
   353  	snap, err := storage.CreateSnapshot(ctx, kind, key, parent, opts...)
   354  	if err != nil {
   355  		return nil, err
   356  	}
   357  
   358  	if len(snap.ParentIDs) == 0 {
   359  		deviceName := s.getDeviceName(snap.ID)
   360  		log.G(ctx).Debugf("creating new thin device '%s'", deviceName)
   361  
   362  		err := s.pool.CreateThinDevice(ctx, deviceName, s.config.BaseImageSizeBytes)
   363  		if err != nil {
   364  			log.G(ctx).WithError(err).Errorf("failed to create thin device for snapshot %s", snap.ID)
   365  			return nil, err
   366  		}
   367  
   368  		if err := s.mkfs(ctx, deviceName); err != nil {
   369  			// Rollback thin device creation if mkfs failed
   370  			return nil, multierror.Append(err,
   371  				s.pool.RemoveDevice(ctx, deviceName))
   372  		}
   373  	} else {
   374  		parentDeviceName := s.getDeviceName(snap.ParentIDs[0])
   375  		snapDeviceName := s.getDeviceName(snap.ID)
   376  		log.G(ctx).Debugf("creating snapshot device '%s' from '%s'", snapDeviceName, parentDeviceName)
   377  
   378  		err := s.pool.CreateSnapshotDevice(ctx, parentDeviceName, snapDeviceName, s.config.BaseImageSizeBytes)
   379  		if err != nil {
   380  			log.G(ctx).WithError(err).Errorf("failed to create snapshot device from parent %s", parentDeviceName)
   381  			return nil, err
   382  		}
   383  	}
   384  
   385  	mounts := s.buildMounts(snap)
   386  
   387  	// Remove default directories not expected by the container image
   388  	_ = mount.WithTempMount(ctx, mounts, func(root string) error {
   389  		return os.Remove(filepath.Join(root, "lost+found"))
   390  	})
   391  
   392  	return mounts, nil
   393  }
   394  
   395  // mkfs creates ext4 filesystem on the given devmapper device
   396  func (s *Snapshotter) mkfs(ctx context.Context, deviceName string) error {
   397  	args := []string{
   398  		"-E",
   399  		// We don't want any zeroing in advance when running mkfs on thin devices (see "man mkfs.ext4")
   400  		"nodiscard,lazy_itable_init=0,lazy_journal_init=0",
   401  		dmsetup.GetFullDevicePath(deviceName),
   402  	}
   403  
   404  	log.G(ctx).Debugf("mkfs.ext4 %s", strings.Join(args, " "))
   405  	output, err := exec.Command("mkfs.ext4", args...).CombinedOutput()
   406  	if err != nil {
   407  		log.G(ctx).WithError(err).Errorf("failed to write fs:\n%s", string(output))
   408  		return err
   409  	}
   410  
   411  	log.G(ctx).Debugf("mkfs:\n%s", string(output))
   412  	return nil
   413  }
   414  
   415  func (s *Snapshotter) getDeviceName(snapID string) string {
   416  	// Add pool name as prefix to avoid collisions with devices from other pools
   417  	return fmt.Sprintf("%s-snap-%s", s.config.PoolName, snapID)
   418  }
   419  
   420  func (s *Snapshotter) getDevicePath(snap storage.Snapshot) string {
   421  	name := s.getDeviceName(snap.ID)
   422  	return dmsetup.GetFullDevicePath(name)
   423  }
   424  
   425  func (s *Snapshotter) buildMounts(snap storage.Snapshot) []mount.Mount {
   426  	var options []string
   427  
   428  	if snap.Kind != snapshots.KindActive {
   429  		options = append(options, "ro")
   430  	}
   431  
   432  	mounts := []mount.Mount{
   433  		{
   434  			Source:  s.getDevicePath(snap),
   435  			Type:    fsTypeExt4,
   436  			Options: options,
   437  		},
   438  	}
   439  
   440  	return mounts
   441  }
   442  
   443  // withTransaction wraps fn callback with containerd's meta store transaction.
   444  // If callback returns an error or transaction is not writable, database transaction will be discarded.
   445  func (s *Snapshotter) withTransaction(ctx context.Context, writable bool, fn func(ctx context.Context) error) error {
   446  	ctx, trans, err := s.store.TransactionContext(ctx, writable)
   447  	if err != nil {
   448  		return err
   449  	}
   450  
   451  	var result *multierror.Error
   452  
   453  	err = fn(ctx)
   454  	if err != nil {
   455  		result = multierror.Append(result, err)
   456  	}
   457  
   458  	// Always rollback if transaction is not writable
   459  	if err != nil || !writable {
   460  		if terr := trans.Rollback(); terr != nil {
   461  			log.G(ctx).WithError(terr).Error("failed to rollback transaction")
   462  			result = multierror.Append(result, errors.Wrap(terr, "rollback failed"))
   463  		}
   464  	} else {
   465  		if terr := trans.Commit(); terr != nil {
   466  			log.G(ctx).WithError(terr).Error("failed to commit transaction")
   467  			result = multierror.Append(result, errors.Wrap(terr, "commit failed"))
   468  		}
   469  	}
   470  
   471  	if err := result.ErrorOrNil(); err != nil {
   472  		log.G(ctx).WithError(err).Debug("snapshotter error")
   473  
   474  		// Unwrap if just one error
   475  		if len(result.Errors) == 1 {
   476  			return result.Errors[0]
   477  		}
   478  
   479  		return err
   480  	}
   481  
   482  	return nil
   483  }
   484  
   485  func (s *Snapshotter) Cleanup(ctx context.Context) error {
   486  	var removedDevices []*DeviceInfo
   487  
   488  	if !s.config.AsyncRemove {
   489  		return nil
   490  	}
   491  
   492  	if err := s.pool.WalkDevices(ctx, func(info *DeviceInfo) error {
   493  		if info.State == Removed {
   494  			removedDevices = append(removedDevices, info)
   495  		}
   496  		return nil
   497  	}); err != nil {
   498  		log.G(ctx).WithError(err).Errorf("failed to query devices from metastore")
   499  		return err
   500  	}
   501  
   502  	var result *multierror.Error
   503  	for _, dev := range removedDevices {
   504  		log.G(ctx).WithField("device", dev.Name).Debug("cleanup device")
   505  		if err := s.pool.RemoveDevice(ctx, dev.Name); err != nil {
   506  			log.G(ctx).WithField("device", dev.Name).Error("failed to cleanup device")
   507  			result = multierror.Append(result, err)
   508  		} else {
   509  			log.G(ctx).WithField("device", dev.Name).Debug("cleanuped device")
   510  		}
   511  	}
   512  
   513  	return result.ErrorOrNil()
   514  }