github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/containers/containerd/containerd_linux.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build linux
    16  
    17  // Package containerd extracts container package from containerd metadb database.
    18  package containerd
    19  
    20  import (
    21  	"context"
    22  	"encoding/binary"
    23  	"encoding/json"
    24  	"errors"
    25  	"fmt"
    26  	"os"
    27  	"path/filepath"
    28  	"runtime"
    29  	"strconv"
    30  	"strings"
    31  	"time"
    32  
    33  	"github.com/containerd/containerd/metadata"
    34  	"github.com/containerd/containerd/namespaces"
    35  	"github.com/google/osv-scalibr/extractor"
    36  	"github.com/google/osv-scalibr/extractor/filesystem"
    37  	"github.com/google/osv-scalibr/extractor/filesystem/internal/units"
    38  	"github.com/google/osv-scalibr/inventory"
    39  	"github.com/google/osv-scalibr/log"
    40  	"github.com/google/osv-scalibr/plugin"
    41  	bolt "go.etcd.io/bbolt"
    42  )
    43  
    44  const (
    45  	// Name is the unique name of this extractor.
    46  	Name = "containers/containerd"
    47  
    48  	// defaultMaxFileSize is the maximum file size.
    49  	// If Extract gets a bigger file, it will return an error.
    50  	defaultMaxFileSize = 500 * units.MiB
    51  
    52  	// Prefix of the path for container's grpc container status file, used to collect pid for a container.
    53  	criPluginStatusFilePrefix = "var/lib/containerd/io.containerd.grpc.v1.cri/containers/"
    54  
    55  	// Prefix of the path for snapshotter overlayfs snapshots folders.
    56  	overlayfsSnapshotsPath = "var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots"
    57  	// The path for the metadata.db file which will be used to parse the mapping between folders and container's mount points.
    58  	snapshotterMetadataDBPath = "var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/metadata.db"
    59  
    60  	// The path for the meta.db file which will be used to parse container metadata on Linux systems.
    61  	linuxMetaDBPath = "var/lib/containerd/io.containerd.metadata.v1.bolt/meta.db"
    62  	// Prefix of the path for runhcs state files, used to check if a container is running by runhcs.
    63  	runhcsStateFilePrefix = "ProgramData/containerd/state/io.containerd.runtime.v2.task/"
    64  )
    65  
    66  // Config is the configuration for the Extractor.
    67  type Config struct {
    68  	// MaxMetaDBFileSize is the maximum file size an extractor will unmarshal.
    69  	// If Extract gets a bigger file, it will return an error.
    70  	MaxMetaDBFileSize int64
    71  }
    72  
    73  // DefaultConfig returns the default configuration for the containerd extractor.
    74  func DefaultConfig() Config {
    75  	return Config{
    76  		MaxMetaDBFileSize: defaultMaxFileSize,
    77  	}
    78  }
    79  
    80  // Extractor extracts containers from the containerd metadb file.
    81  type Extractor struct {
    82  	maxMetaDBFileSize int64
    83  }
    84  
    85  // New returns a containerd container package extractor.
    86  func New(cfg Config) *Extractor {
    87  	return &Extractor{
    88  		maxMetaDBFileSize: cfg.MaxMetaDBFileSize,
    89  	}
    90  }
    91  
    92  // NewDefault returns an extractor with the default config settings.
    93  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
    94  
    95  // Config returns the configuration of the extractor.
    96  func (e Extractor) Config() Config {
    97  	return Config{
    98  		MaxMetaDBFileSize: e.maxMetaDBFileSize,
    99  	}
   100  }
   101  
   102  // Name of the extractor.
   103  func (e Extractor) Name() string { return Name }
   104  
   105  // Version of the extractor.
   106  func (e Extractor) Version() int { return 0 }
   107  
   108  // Requirements of the extractor.
   109  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{DirectFS: true} }
   110  
   111  // FileRequired returns true if the specified file matches containerd metaDB file pattern.
   112  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
   113  	path := api.Path()
   114  	// On Windows the metadb file is expected to be located at the
   115  	// <scanRoot>/ProgramData/containerd/root/io.containerd.metadata.v1.bolt/meta.db path.
   116  	switch runtime.GOOS {
   117  	case "windows":
   118  		return path == "ProgramData/containerd/root/io.containerd.metadata.v1.bolt/meta.db"
   119  
   120  	// On Linux the metadb file is expected to be located at the
   121  	// <scanRoot>/var/lib/containerd/io.containerd.metadata.v1.bolt/meta.db path.
   122  	default:
   123  		return path == linuxMetaDBPath
   124  	}
   125  }
   126  
   127  // Extract container package through the containerd metadb file passed as the scan input.
   128  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   129  	var pkgs = []*extractor.Package{}
   130  
   131  	if input.Info != nil && input.Info.Size() > e.maxMetaDBFileSize {
   132  		return inventory.Inventory{}, fmt.Errorf("containerd metadb file is too large: %d", input.Info.Size())
   133  	}
   134  	// Timeout is added to make sure Scalibr does not hand if the metadb file is open by another process.
   135  	// This will still allow to handle the snapshot of a machine.
   136  	metaDB, err := bolt.Open(filepath.Join(input.Root, input.Path), 0444, &bolt.Options{Timeout: 1 * time.Second})
   137  	if err != nil {
   138  		return inventory.Inventory{}, fmt.Errorf("could not read the containerd metadb file: %w", err)
   139  	}
   140  
   141  	defer metaDB.Close()
   142  
   143  	var snapshotsMetadata []SnapshotMetadata
   144  	// If it's linux, parse the default overlayfs snapshotter metadata.db file.
   145  	if input.Path == linuxMetaDBPath {
   146  		fullMetadataDBPath := filepath.Join(input.Root, snapshotterMetadataDBPath)
   147  		snapshotsMetadata, err = snapshotsMetadataFromDB(fullMetadataDBPath, e.maxMetaDBFileSize, "overlayfs")
   148  		if err != nil {
   149  			return inventory.Inventory{}, fmt.Errorf("could not collect snapshots metadata from DB: %w", err)
   150  		}
   151  	}
   152  
   153  	ctrMetadata, err := containersFromMetaDB(ctx, metaDB, input.Root, snapshotsMetadata)
   154  	if err != nil {
   155  		log.Errorf("Could not get container package from the containerd metadb file: %v", err)
   156  		return inventory.Inventory{}, err
   157  	}
   158  
   159  	for _, ctr := range ctrMetadata {
   160  		pkg := &extractor.Package{
   161  			Name:      ctr.ImageName,
   162  			Version:   ctr.ImageDigest,
   163  			Locations: []string{input.Path},
   164  			Metadata:  &ctr,
   165  		}
   166  		pkgs = append(pkgs, pkg)
   167  	}
   168  	return inventory.Inventory{Packages: pkgs}, nil
   169  }
   170  
   171  // This method checks if the given file is valid to be opened, and make sure it's not oversized.
   172  func fileSizeCheck(filepath string, maxFileSize int64) (err error) {
   173  	fileInfo, err := os.Stat(filepath)
   174  	if err != nil {
   175  		return err
   176  	}
   177  	if fileInfo.Size() > maxFileSize {
   178  		return fmt.Errorf("file %s is too large: %d", filepath, fileInfo.Size())
   179  	}
   180  	return nil
   181  }
   182  
   183  // namespacesFromMetaDB returns the list of namespaces stored in the containerd metaDB file.
   184  func namespacesFromMetaDB(ctx context.Context, metaDB *bolt.DB) ([]string, error) {
   185  	var namespaces []string
   186  
   187  	err := metaDB.View(func(tx *bolt.Tx) error {
   188  		store := metadata.NewNamespaceStore(tx)
   189  		nss, err := store.List(ctx)
   190  		if err != nil {
   191  			return err
   192  		}
   193  		namespaces = nss
   194  		return nil
   195  	})
   196  
   197  	if err != nil {
   198  		return nil, err
   199  	}
   200  
   201  	return namespaces, nil
   202  }
   203  
   204  func containersFromMetaDB(ctx context.Context, metaDB *bolt.DB, scanRoot string, snapshotsMetadata []SnapshotMetadata) ([]Metadata, error) {
   205  	var containersMetadata []Metadata
   206  	// Get list of namespaces from the containerd metadb file.
   207  	nss, err := namespacesFromMetaDB(ctx, metaDB)
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  	containerdDB := metadata.NewDB(metaDB, nil, nil)
   212  	containerStore := metadata.NewContainerStore(containerdDB)
   213  	imageStore := metadata.NewImageStore(containerdDB)
   214  	for _, ns := range nss {
   215  		// For each namespace stored in the metadb, get the container list to handle.
   216  		ctx := namespaces.WithNamespace(ctx, ns)
   217  		ctrs, err := containerStore.List(ctx)
   218  		if err != nil {
   219  			return nil, err
   220  		}
   221  
   222  		// For each container in the namespace
   223  		// get the init process pid (only running containers will have it stored on the file system)
   224  		// and the image digest.
   225  		for _, ctr := range ctrs {
   226  			var initPID int
   227  			id := ctr.ID
   228  			if initPID = containerInitPid(scanRoot, ctr.Runtime.Name, ns, id); initPID == -1 {
   229  				continue
   230  			}
   231  			img, err := imageStore.Get(ctx, ctr.Image)
   232  			if err != nil {
   233  				log.Errorf("Could not find the image for container %v, error: %v", id, err)
   234  			}
   235  
   236  			var lowerDir, upperDir, workDir string
   237  			// If the filesystem is overlayfs, then parse overlayfs metadata.db
   238  			if ctr.Snapshotter == "overlayfs" {
   239  				lowerDir, upperDir, workDir = collectDirs(scanRoot, snapshotsMetadata, ctr.SnapshotKey)
   240  			}
   241  
   242  			containersMetadata = append(containersMetadata,
   243  				Metadata{Namespace: ns,
   244  					ImageName:    img.Name,
   245  					ImageDigest:  img.Target.Digest.String(),
   246  					Runtime:      ctr.Runtime.Name,
   247  					PodName:      ctr.Labels["io.kubernetes.pod.name"],
   248  					PodNamespace: ctr.Labels["io.kubernetes.pod.namespace"],
   249  					ID:           id,
   250  					PID:          initPID,
   251  					Snapshotter:  ctr.Snapshotter,
   252  					SnapshotKey:  ctr.SnapshotKey,
   253  					LowerDir:     lowerDir,
   254  					UpperDir:     upperDir,
   255  					WorkDir:      workDir})
   256  		}
   257  	}
   258  	return containersMetadata, nil
   259  }
   260  
   261  // Trim the snapshot digest to match the snapshot key in the metadata.db file.
   262  func digestSnapshotInfoMapping(snapshotsMetadata []SnapshotMetadata) map[string]SnapshotMetadata {
   263  	digestSnapshotInfoMapping := make(map[string]SnapshotMetadata)
   264  	for _, snapshotMetadata := range snapshotsMetadata {
   265  		// The snapshotMetadata.Digest is in the format of ".*/<digest>".
   266  		// The snapshotKey in the metadata.db file is the "<digest>" part.
   267  		// If the snapshotMetadata.Digest does not have the "/" or "/" is the last character, then it's
   268  		// not a valid snapshot digest.
   269  		digestSplitterIndex := strings.LastIndex(snapshotMetadata.Digest, "/")
   270  		if digestSplitterIndex == -1 || digestSplitterIndex == len(snapshotMetadata.Digest)-1 {
   271  			continue
   272  		}
   273  		shorterDigest := snapshotMetadata.Digest[digestSplitterIndex+1:]
   274  		digestSnapshotInfoMapping[shorterDigest] = snapshotMetadata
   275  	}
   276  	return digestSnapshotInfoMapping
   277  }
   278  
   279  // Format the lowerDir, upperDir and workDir for the container.
   280  func collectDirs(scanRoot string, snapshotsMetadata []SnapshotMetadata, snapshotKey string) (string, string, string) {
   281  	var lowerDirs []string
   282  	var parentSnapshotIDs []uint64
   283  	parentSnapshotIDs = getParentSnapshotIDByDigest(snapshotsMetadata, snapshotKey, parentSnapshotIDs)
   284  	for _, parentSnapshotID := range parentSnapshotIDs {
   285  		lowerDirs = append(lowerDirs, filepath.Join(scanRoot, overlayfsSnapshotsPath, strconv.FormatUint(parentSnapshotID, 10), "fs"))
   286  	}
   287  	// Sample lowerDir: lowerdir=/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/15/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/12/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/8/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/5/fs
   288  	lowerDir := strings.Join(lowerDirs, ":")
   289  	for _, snapshotMetadata := range snapshotsMetadata {
   290  		if strings.Contains(snapshotMetadata.Digest, snapshotKey) {
   291  			upperDir := filepath.Join(scanRoot, overlayfsSnapshotsPath, strconv.FormatUint(snapshotMetadata.ID, 10), "fs")
   292  			workDir := filepath.Join(scanRoot, overlayfsSnapshotsPath, strconv.FormatUint(snapshotMetadata.ID, 10), "work")
   293  			return lowerDir, upperDir, workDir
   294  		}
   295  	}
   296  	return lowerDir, "", ""
   297  }
   298  
   299  // Collect the parent snapshot ids of the given snapshot.
   300  func getParentSnapshotIDByDigest(snapshotsMetadata []SnapshotMetadata, digest string, parentIDList []uint64) []uint64 {
   301  	snapshotMetadataDict := digestSnapshotInfoMapping(snapshotsMetadata)
   302  	if _, ok := snapshotMetadataDict[digest]; !ok {
   303  		log.Errorf("Could not find the parent snapshot info in the metadata.db file for digest: %v", digest)
   304  		return parentIDList
   305  	}
   306  	parentSnapshotMetadata := snapshotMetadataDict[digest]
   307  	if strings.Contains(digest, "sha256:") {
   308  		// start from its parent snapshots.
   309  		parentIDList = append(parentIDList, parentSnapshotMetadata.ID)
   310  	}
   311  	if parentSnapshotMetadata.Parent == "" {
   312  		return parentIDList
   313  	}
   314  	shorterDigest := parentSnapshotMetadata.Parent[strings.LastIndex(snapshotMetadataDict[digest].Parent, "/")+1:]
   315  	return getParentSnapshotIDByDigest(snapshotsMetadata, shorterDigest, parentIDList)
   316  }
   317  
   318  // Parse the snapshots information from Metadata.db if db file is valid and not too large.
   319  func snapshotsMetadataFromDB(fullMetadataDBPath string, maxMetaDBFileSize int64, fileSystemDriver string) ([]SnapshotMetadata, error) {
   320  	// extracted snapshots metadata from the metadata.db file.
   321  	var snapshotsMetadata []SnapshotMetadata
   322  
   323  	// Check if the file is valid to be opened, and make sure it's not too large.
   324  	err := fileSizeCheck(fullMetadataDBPath, maxMetaDBFileSize)
   325  	if err != nil {
   326  		return nil, fmt.Errorf("could not read the containerd metadb file: %w", err)
   327  	}
   328  
   329  	metadataDB, err := bolt.Open(fullMetadataDBPath, 0444, &bolt.Options{Timeout: 1 * time.Second})
   330  	if err != nil {
   331  		return nil, fmt.Errorf("could not read the containerd metadb file: %w", err)
   332  	}
   333  	defer metadataDB.Close()
   334  	err = metadataDB.View(func(tx *bolt.Tx) error {
   335  		snapshotsBucketByDigest, err := snapshotsBucketByDigest(tx)
   336  		if err != nil {
   337  			return fmt.Errorf("not able to grab the names of the snapshot buckets: %w", err)
   338  		}
   339  		// Store the important info of the snapshots into snapshotMetadata struct.
   340  		snapshotsMetadata = snapshotMetadataFromSnapshotsBuckets(tx, snapshotsBucketByDigest, snapshotsMetadata, fileSystemDriver)
   341  		return nil
   342  	})
   343  	if err != nil {
   344  		log.Errorf("Not able to view the db: %v", err)
   345  		return nil, err
   346  	}
   347  	return snapshotsMetadata, nil
   348  }
   349  
   350  // List the names of the snapshot buckets that are stored in the metadata.db file.
   351  func snapshotsBucketByDigest(tx *bolt.Tx) ([]string, error) {
   352  	// List of bucket names.These buckets stores snapshots information. Normally its name
   353  	// is the digest.
   354  	var snapshotsBucketByDigest []string
   355  	//  metadata db structure: v1-> snapshots -> <snapshot_digest> -> <snapshot_info_fields>
   356  	if tx == nil {
   357  		return snapshotsBucketByDigest, errors.New("the transaction is nil")
   358  	}
   359  	if tx.Bucket([]byte("v1")) == nil {
   360  		return snapshotsBucketByDigest, errors.New("could not find the v1 bucket in the metadata.db file")
   361  	}
   362  	if tx.Bucket([]byte("v1")).Bucket([]byte("snapshots")) == nil {
   363  		return snapshotsBucketByDigest, errors.New("could not find the snapshots bucket in the metadata.db file")
   364  	}
   365  	snapshotsMetadataBucket := tx.Bucket([]byte("v1")).Bucket([]byte("snapshots"))
   366  	err := snapshotsMetadataBucket.ForEach(func(k []byte, v []byte) error {
   367  		// When the value is nil, it means it's a bucket. In this case, we would like to grab the
   368  		// bucket name and visit it later.
   369  		if v == nil {
   370  			snapshotsBucketByDigest = append(snapshotsBucketByDigest, string(k))
   371  		}
   372  		return nil
   373  	})
   374  	return snapshotsBucketByDigest, err
   375  }
   376  
   377  func snapshotMetadataFromSnapshotsBuckets(tx *bolt.Tx, snapshotsBucketByDigest []string, snapshotsMetadata []SnapshotMetadata, fileSystemDriver string) []SnapshotMetadata {
   378  	for _, shaDigest := range snapshotsBucketByDigest {
   379  		if tx == nil {
   380  			return snapshotsMetadata
   381  		}
   382  		if tx.Bucket([]byte("v1")) == nil {
   383  			return snapshotsMetadata
   384  		}
   385  		if tx.Bucket([]byte("v1")).Bucket([]byte("snapshots")) == nil {
   386  			return snapshotsMetadata
   387  		}
   388  		if tx.Bucket([]byte("v1")).Bucket([]byte("snapshots")).Bucket([]byte(shaDigest)) == nil {
   389  			return snapshotsMetadata
   390  		}
   391  		// Get the bucket by digest.
   392  		snapshotMetadataBucket := tx.Bucket([]byte("v1")).Bucket([]byte("snapshots")).Bucket([]byte(shaDigest))
   393  		// This id is the corresponding folder name in overlayfs/snapshots folder.
   394  		id := uint64(0)
   395  		idByte := snapshotMetadataBucket.Get([]byte("id"))
   396  		if idByte != nil {
   397  			id, _ = binary.Uvarint(idByte)
   398  		}
   399  		// The status of the snapshot.
   400  		kind := -1
   401  		kindByte := snapshotMetadataBucket.Get([]byte("kind"))
   402  		if kindByte != nil {
   403  			kind = int(kindByte[0])
   404  		}
   405  		// The parent snapshot of the snapshot.
   406  		parent := ""
   407  		parentByte := snapshotMetadataBucket.Get([]byte("parent"))
   408  		if parentByte != nil {
   409  			parent = string(parentByte)
   410  		}
   411  
   412  		snapshotsMetadata = append(snapshotsMetadata, SnapshotMetadata{Digest: shaDigest, ID: id, Kind: kind, Parent: parent, FilesystemType: fileSystemDriver})
   413  	}
   414  	return snapshotsMetadata
   415  }
   416  
   417  func containerInitPid(scanRoot string, runtimeName string, namespace string, id string) int {
   418  	// A typical Linux case.
   419  	if runtimeName == "io.containerd.runc.v2" {
   420  		return runcInitPid(scanRoot, id)
   421  	}
   422  
   423  	// A typical Windows case.
   424  	if runtimeName == "io.containerd.runhcs.v1" {
   425  		return runhcsInitPid(scanRoot, namespace, id)
   426  	}
   427  
   428  	return -1
   429  }
   430  
   431  func runcInitPid(scanRoot string, id string) int {
   432  	// If a container is running by runc, the init pid is stored in the grpc status file.
   433  	// status file is located at the
   434  	// <scanRoot>/<criPluginStatusFilePrefix>/<container_id>/status path.
   435  	statusPath := filepath.Join(scanRoot, criPluginStatusFilePrefix, id, "status")
   436  	if _, err := os.Stat(statusPath); err != nil {
   437  		log.Info("File status does not exists for container %v, error: %v", id, err)
   438  		return -1
   439  	}
   440  
   441  	err := fileSizeCheck(statusPath, defaultMaxFileSize)
   442  	if err != nil {
   443  		return -1
   444  	}
   445  
   446  	initPID := -1
   447  
   448  	statusContent, err := os.ReadFile(statusPath)
   449  	if err != nil {
   450  		log.Errorf("Could not read for %s status for container: %v", id, err)
   451  		return -1
   452  	}
   453  	var grpcContainerStatus map[string]*json.RawMessage
   454  	if err := json.Unmarshal(statusContent, &grpcContainerStatus); err != nil {
   455  		log.Errorf("Can't unmarshal status for container %v , error: %v", id, err)
   456  		return -1
   457  	}
   458  
   459  	if _, ok := grpcContainerStatus["Pid"]; !ok {
   460  		log.Errorf("Can't find field pid filed in status for container %v", id)
   461  		return -1
   462  	}
   463  	if err := json.Unmarshal(*grpcContainerStatus["Pid"], &initPID); err != nil {
   464  		log.Errorf("Can't unmarshal pid in status for container %v, error: %v", id, err)
   465  		return -1
   466  	}
   467  
   468  	return initPID
   469  }
   470  
   471  func runhcsInitPid(scanRoot string, namespace string, id string) int {
   472  	// If a container is running by runhcs, the init pid is stored in the runhcs shim.pid file.
   473  	// shim.pid file is located at the
   474  	// <scanRoot>/<runhcsStateFilePrefix>/<namespace_name>/<container_id>/shim.pid.
   475  	shimPIDPath := filepath.Join(scanRoot, runhcsStateFilePrefix, namespace, id, "shim.pid")
   476  	if _, err := os.Stat(shimPIDPath); err != nil {
   477  		log.Info("File shim.pid does not exists for container %v, error: %v", id, err)
   478  		return -1
   479  	}
   480  
   481  	shimPIDContent, err := os.ReadFile(shimPIDPath)
   482  	if err != nil {
   483  		log.Errorf("Could not read for %s shim.pid for container: %v", id, err)
   484  		return -1
   485  	}
   486  	shimPidStr := strings.TrimSpace(string(shimPIDContent))
   487  	initPID, err := strconv.Atoi(shimPidStr)
   488  	if err != nil {
   489  		log.Errorf("Can't convert shim.pid content to int for container %v, error: %v", id, err)
   490  		return -1
   491  	}
   492  	return initPID
   493  }