github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/containers/containerd/containerd_linux.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build linux 16 17 // Package containerd extracts container package from containerd metadb database. 18 package containerd 19 20 import ( 21 "context" 22 "encoding/binary" 23 "encoding/json" 24 "errors" 25 "fmt" 26 "os" 27 "path/filepath" 28 "runtime" 29 "strconv" 30 "strings" 31 "time" 32 33 "github.com/containerd/containerd/metadata" 34 "github.com/containerd/containerd/namespaces" 35 "github.com/google/osv-scalibr/extractor" 36 "github.com/google/osv-scalibr/extractor/filesystem" 37 "github.com/google/osv-scalibr/extractor/filesystem/internal/units" 38 "github.com/google/osv-scalibr/inventory" 39 "github.com/google/osv-scalibr/log" 40 "github.com/google/osv-scalibr/plugin" 41 bolt "go.etcd.io/bbolt" 42 ) 43 44 const ( 45 // Name is the unique name of this extractor. 46 Name = "containers/containerd" 47 48 // defaultMaxFileSize is the maximum file size. 49 // If Extract gets a bigger file, it will return an error. 50 defaultMaxFileSize = 500 * units.MiB 51 52 // Prefix of the path for container's grpc container status file, used to collect pid for a container. 53 criPluginStatusFilePrefix = "var/lib/containerd/io.containerd.grpc.v1.cri/containers/" 54 55 // Prefix of the path for snapshotter overlayfs snapshots folders. 56 overlayfsSnapshotsPath = "var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots" 57 // The path for the metadata.db file which will be used to parse the mapping between folders and container's mount points. 58 snapshotterMetadataDBPath = "var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/metadata.db" 59 60 // The path for the meta.db file which will be used to parse container metadata on Linux systems. 61 linuxMetaDBPath = "var/lib/containerd/io.containerd.metadata.v1.bolt/meta.db" 62 // Prefix of the path for runhcs state files, used to check if a container is running by runhcs. 63 runhcsStateFilePrefix = "ProgramData/containerd/state/io.containerd.runtime.v2.task/" 64 ) 65 66 // Config is the configuration for the Extractor. 67 type Config struct { 68 // MaxMetaDBFileSize is the maximum file size an extractor will unmarshal. 69 // If Extract gets a bigger file, it will return an error. 70 MaxMetaDBFileSize int64 71 } 72 73 // DefaultConfig returns the default configuration for the containerd extractor. 74 func DefaultConfig() Config { 75 return Config{ 76 MaxMetaDBFileSize: defaultMaxFileSize, 77 } 78 } 79 80 // Extractor extracts containers from the containerd metadb file. 81 type Extractor struct { 82 maxMetaDBFileSize int64 83 } 84 85 // New returns a containerd container package extractor. 86 func New(cfg Config) *Extractor { 87 return &Extractor{ 88 maxMetaDBFileSize: cfg.MaxMetaDBFileSize, 89 } 90 } 91 92 // NewDefault returns an extractor with the default config settings. 93 func NewDefault() filesystem.Extractor { return New(DefaultConfig()) } 94 95 // Config returns the configuration of the extractor. 96 func (e Extractor) Config() Config { 97 return Config{ 98 MaxMetaDBFileSize: e.maxMetaDBFileSize, 99 } 100 } 101 102 // Name of the extractor. 103 func (e Extractor) Name() string { return Name } 104 105 // Version of the extractor. 106 func (e Extractor) Version() int { return 0 } 107 108 // Requirements of the extractor. 109 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{DirectFS: true} } 110 111 // FileRequired returns true if the specified file matches containerd metaDB file pattern. 112 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 113 path := api.Path() 114 // On Windows the metadb file is expected to be located at the 115 // <scanRoot>/ProgramData/containerd/root/io.containerd.metadata.v1.bolt/meta.db path. 116 switch runtime.GOOS { 117 case "windows": 118 return path == "ProgramData/containerd/root/io.containerd.metadata.v1.bolt/meta.db" 119 120 // On Linux the metadb file is expected to be located at the 121 // <scanRoot>/var/lib/containerd/io.containerd.metadata.v1.bolt/meta.db path. 122 default: 123 return path == linuxMetaDBPath 124 } 125 } 126 127 // Extract container package through the containerd metadb file passed as the scan input. 128 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 129 var pkgs = []*extractor.Package{} 130 131 if input.Info != nil && input.Info.Size() > e.maxMetaDBFileSize { 132 return inventory.Inventory{}, fmt.Errorf("containerd metadb file is too large: %d", input.Info.Size()) 133 } 134 // Timeout is added to make sure Scalibr does not hand if the metadb file is open by another process. 135 // This will still allow to handle the snapshot of a machine. 136 metaDB, err := bolt.Open(filepath.Join(input.Root, input.Path), 0444, &bolt.Options{Timeout: 1 * time.Second}) 137 if err != nil { 138 return inventory.Inventory{}, fmt.Errorf("could not read the containerd metadb file: %w", err) 139 } 140 141 defer metaDB.Close() 142 143 var snapshotsMetadata []SnapshotMetadata 144 // If it's linux, parse the default overlayfs snapshotter metadata.db file. 145 if input.Path == linuxMetaDBPath { 146 fullMetadataDBPath := filepath.Join(input.Root, snapshotterMetadataDBPath) 147 snapshotsMetadata, err = snapshotsMetadataFromDB(fullMetadataDBPath, e.maxMetaDBFileSize, "overlayfs") 148 if err != nil { 149 return inventory.Inventory{}, fmt.Errorf("could not collect snapshots metadata from DB: %w", err) 150 } 151 } 152 153 ctrMetadata, err := containersFromMetaDB(ctx, metaDB, input.Root, snapshotsMetadata) 154 if err != nil { 155 log.Errorf("Could not get container package from the containerd metadb file: %v", err) 156 return inventory.Inventory{}, err 157 } 158 159 for _, ctr := range ctrMetadata { 160 pkg := &extractor.Package{ 161 Name: ctr.ImageName, 162 Version: ctr.ImageDigest, 163 Locations: []string{input.Path}, 164 Metadata: &ctr, 165 } 166 pkgs = append(pkgs, pkg) 167 } 168 return inventory.Inventory{Packages: pkgs}, nil 169 } 170 171 // This method checks if the given file is valid to be opened, and make sure it's not oversized. 172 func fileSizeCheck(filepath string, maxFileSize int64) (err error) { 173 fileInfo, err := os.Stat(filepath) 174 if err != nil { 175 return err 176 } 177 if fileInfo.Size() > maxFileSize { 178 return fmt.Errorf("file %s is too large: %d", filepath, fileInfo.Size()) 179 } 180 return nil 181 } 182 183 // namespacesFromMetaDB returns the list of namespaces stored in the containerd metaDB file. 184 func namespacesFromMetaDB(ctx context.Context, metaDB *bolt.DB) ([]string, error) { 185 var namespaces []string 186 187 err := metaDB.View(func(tx *bolt.Tx) error { 188 store := metadata.NewNamespaceStore(tx) 189 nss, err := store.List(ctx) 190 if err != nil { 191 return err 192 } 193 namespaces = nss 194 return nil 195 }) 196 197 if err != nil { 198 return nil, err 199 } 200 201 return namespaces, nil 202 } 203 204 func containersFromMetaDB(ctx context.Context, metaDB *bolt.DB, scanRoot string, snapshotsMetadata []SnapshotMetadata) ([]Metadata, error) { 205 var containersMetadata []Metadata 206 // Get list of namespaces from the containerd metadb file. 207 nss, err := namespacesFromMetaDB(ctx, metaDB) 208 if err != nil { 209 return nil, err 210 } 211 containerdDB := metadata.NewDB(metaDB, nil, nil) 212 containerStore := metadata.NewContainerStore(containerdDB) 213 imageStore := metadata.NewImageStore(containerdDB) 214 for _, ns := range nss { 215 // For each namespace stored in the metadb, get the container list to handle. 216 ctx := namespaces.WithNamespace(ctx, ns) 217 ctrs, err := containerStore.List(ctx) 218 if err != nil { 219 return nil, err 220 } 221 222 // For each container in the namespace 223 // get the init process pid (only running containers will have it stored on the file system) 224 // and the image digest. 225 for _, ctr := range ctrs { 226 var initPID int 227 id := ctr.ID 228 if initPID = containerInitPid(scanRoot, ctr.Runtime.Name, ns, id); initPID == -1 { 229 continue 230 } 231 img, err := imageStore.Get(ctx, ctr.Image) 232 if err != nil { 233 log.Errorf("Could not find the image for container %v, error: %v", id, err) 234 } 235 236 var lowerDir, upperDir, workDir string 237 // If the filesystem is overlayfs, then parse overlayfs metadata.db 238 if ctr.Snapshotter == "overlayfs" { 239 lowerDir, upperDir, workDir = collectDirs(scanRoot, snapshotsMetadata, ctr.SnapshotKey) 240 } 241 242 containersMetadata = append(containersMetadata, 243 Metadata{Namespace: ns, 244 ImageName: img.Name, 245 ImageDigest: img.Target.Digest.String(), 246 Runtime: ctr.Runtime.Name, 247 PodName: ctr.Labels["io.kubernetes.pod.name"], 248 PodNamespace: ctr.Labels["io.kubernetes.pod.namespace"], 249 ID: id, 250 PID: initPID, 251 Snapshotter: ctr.Snapshotter, 252 SnapshotKey: ctr.SnapshotKey, 253 LowerDir: lowerDir, 254 UpperDir: upperDir, 255 WorkDir: workDir}) 256 } 257 } 258 return containersMetadata, nil 259 } 260 261 // Trim the snapshot digest to match the snapshot key in the metadata.db file. 262 func digestSnapshotInfoMapping(snapshotsMetadata []SnapshotMetadata) map[string]SnapshotMetadata { 263 digestSnapshotInfoMapping := make(map[string]SnapshotMetadata) 264 for _, snapshotMetadata := range snapshotsMetadata { 265 // The snapshotMetadata.Digest is in the format of ".*/<digest>". 266 // The snapshotKey in the metadata.db file is the "<digest>" part. 267 // If the snapshotMetadata.Digest does not have the "/" or "/" is the last character, then it's 268 // not a valid snapshot digest. 269 digestSplitterIndex := strings.LastIndex(snapshotMetadata.Digest, "/") 270 if digestSplitterIndex == -1 || digestSplitterIndex == len(snapshotMetadata.Digest)-1 { 271 continue 272 } 273 shorterDigest := snapshotMetadata.Digest[digestSplitterIndex+1:] 274 digestSnapshotInfoMapping[shorterDigest] = snapshotMetadata 275 } 276 return digestSnapshotInfoMapping 277 } 278 279 // Format the lowerDir, upperDir and workDir for the container. 280 func collectDirs(scanRoot string, snapshotsMetadata []SnapshotMetadata, snapshotKey string) (string, string, string) { 281 var lowerDirs []string 282 var parentSnapshotIDs []uint64 283 parentSnapshotIDs = getParentSnapshotIDByDigest(snapshotsMetadata, snapshotKey, parentSnapshotIDs) 284 for _, parentSnapshotID := range parentSnapshotIDs { 285 lowerDirs = append(lowerDirs, filepath.Join(scanRoot, overlayfsSnapshotsPath, strconv.FormatUint(parentSnapshotID, 10), "fs")) 286 } 287 // Sample lowerDir: lowerdir=/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/15/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/12/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/8/fs:/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/5/fs 288 lowerDir := strings.Join(lowerDirs, ":") 289 for _, snapshotMetadata := range snapshotsMetadata { 290 if strings.Contains(snapshotMetadata.Digest, snapshotKey) { 291 upperDir := filepath.Join(scanRoot, overlayfsSnapshotsPath, strconv.FormatUint(snapshotMetadata.ID, 10), "fs") 292 workDir := filepath.Join(scanRoot, overlayfsSnapshotsPath, strconv.FormatUint(snapshotMetadata.ID, 10), "work") 293 return lowerDir, upperDir, workDir 294 } 295 } 296 return lowerDir, "", "" 297 } 298 299 // Collect the parent snapshot ids of the given snapshot. 300 func getParentSnapshotIDByDigest(snapshotsMetadata []SnapshotMetadata, digest string, parentIDList []uint64) []uint64 { 301 snapshotMetadataDict := digestSnapshotInfoMapping(snapshotsMetadata) 302 if _, ok := snapshotMetadataDict[digest]; !ok { 303 log.Errorf("Could not find the parent snapshot info in the metadata.db file for digest: %v", digest) 304 return parentIDList 305 } 306 parentSnapshotMetadata := snapshotMetadataDict[digest] 307 if strings.Contains(digest, "sha256:") { 308 // start from its parent snapshots. 309 parentIDList = append(parentIDList, parentSnapshotMetadata.ID) 310 } 311 if parentSnapshotMetadata.Parent == "" { 312 return parentIDList 313 } 314 shorterDigest := parentSnapshotMetadata.Parent[strings.LastIndex(snapshotMetadataDict[digest].Parent, "/")+1:] 315 return getParentSnapshotIDByDigest(snapshotsMetadata, shorterDigest, parentIDList) 316 } 317 318 // Parse the snapshots information from Metadata.db if db file is valid and not too large. 319 func snapshotsMetadataFromDB(fullMetadataDBPath string, maxMetaDBFileSize int64, fileSystemDriver string) ([]SnapshotMetadata, error) { 320 // extracted snapshots metadata from the metadata.db file. 321 var snapshotsMetadata []SnapshotMetadata 322 323 // Check if the file is valid to be opened, and make sure it's not too large. 324 err := fileSizeCheck(fullMetadataDBPath, maxMetaDBFileSize) 325 if err != nil { 326 return nil, fmt.Errorf("could not read the containerd metadb file: %w", err) 327 } 328 329 metadataDB, err := bolt.Open(fullMetadataDBPath, 0444, &bolt.Options{Timeout: 1 * time.Second}) 330 if err != nil { 331 return nil, fmt.Errorf("could not read the containerd metadb file: %w", err) 332 } 333 defer metadataDB.Close() 334 err = metadataDB.View(func(tx *bolt.Tx) error { 335 snapshotsBucketByDigest, err := snapshotsBucketByDigest(tx) 336 if err != nil { 337 return fmt.Errorf("not able to grab the names of the snapshot buckets: %w", err) 338 } 339 // Store the important info of the snapshots into snapshotMetadata struct. 340 snapshotsMetadata = snapshotMetadataFromSnapshotsBuckets(tx, snapshotsBucketByDigest, snapshotsMetadata, fileSystemDriver) 341 return nil 342 }) 343 if err != nil { 344 log.Errorf("Not able to view the db: %v", err) 345 return nil, err 346 } 347 return snapshotsMetadata, nil 348 } 349 350 // List the names of the snapshot buckets that are stored in the metadata.db file. 351 func snapshotsBucketByDigest(tx *bolt.Tx) ([]string, error) { 352 // List of bucket names.These buckets stores snapshots information. Normally its name 353 // is the digest. 354 var snapshotsBucketByDigest []string 355 // metadata db structure: v1-> snapshots -> <snapshot_digest> -> <snapshot_info_fields> 356 if tx == nil { 357 return snapshotsBucketByDigest, errors.New("the transaction is nil") 358 } 359 if tx.Bucket([]byte("v1")) == nil { 360 return snapshotsBucketByDigest, errors.New("could not find the v1 bucket in the metadata.db file") 361 } 362 if tx.Bucket([]byte("v1")).Bucket([]byte("snapshots")) == nil { 363 return snapshotsBucketByDigest, errors.New("could not find the snapshots bucket in the metadata.db file") 364 } 365 snapshotsMetadataBucket := tx.Bucket([]byte("v1")).Bucket([]byte("snapshots")) 366 err := snapshotsMetadataBucket.ForEach(func(k []byte, v []byte) error { 367 // When the value is nil, it means it's a bucket. In this case, we would like to grab the 368 // bucket name and visit it later. 369 if v == nil { 370 snapshotsBucketByDigest = append(snapshotsBucketByDigest, string(k)) 371 } 372 return nil 373 }) 374 return snapshotsBucketByDigest, err 375 } 376 377 func snapshotMetadataFromSnapshotsBuckets(tx *bolt.Tx, snapshotsBucketByDigest []string, snapshotsMetadata []SnapshotMetadata, fileSystemDriver string) []SnapshotMetadata { 378 for _, shaDigest := range snapshotsBucketByDigest { 379 if tx == nil { 380 return snapshotsMetadata 381 } 382 if tx.Bucket([]byte("v1")) == nil { 383 return snapshotsMetadata 384 } 385 if tx.Bucket([]byte("v1")).Bucket([]byte("snapshots")) == nil { 386 return snapshotsMetadata 387 } 388 if tx.Bucket([]byte("v1")).Bucket([]byte("snapshots")).Bucket([]byte(shaDigest)) == nil { 389 return snapshotsMetadata 390 } 391 // Get the bucket by digest. 392 snapshotMetadataBucket := tx.Bucket([]byte("v1")).Bucket([]byte("snapshots")).Bucket([]byte(shaDigest)) 393 // This id is the corresponding folder name in overlayfs/snapshots folder. 394 id := uint64(0) 395 idByte := snapshotMetadataBucket.Get([]byte("id")) 396 if idByte != nil { 397 id, _ = binary.Uvarint(idByte) 398 } 399 // The status of the snapshot. 400 kind := -1 401 kindByte := snapshotMetadataBucket.Get([]byte("kind")) 402 if kindByte != nil { 403 kind = int(kindByte[0]) 404 } 405 // The parent snapshot of the snapshot. 406 parent := "" 407 parentByte := snapshotMetadataBucket.Get([]byte("parent")) 408 if parentByte != nil { 409 parent = string(parentByte) 410 } 411 412 snapshotsMetadata = append(snapshotsMetadata, SnapshotMetadata{Digest: shaDigest, ID: id, Kind: kind, Parent: parent, FilesystemType: fileSystemDriver}) 413 } 414 return snapshotsMetadata 415 } 416 417 func containerInitPid(scanRoot string, runtimeName string, namespace string, id string) int { 418 // A typical Linux case. 419 if runtimeName == "io.containerd.runc.v2" { 420 return runcInitPid(scanRoot, id) 421 } 422 423 // A typical Windows case. 424 if runtimeName == "io.containerd.runhcs.v1" { 425 return runhcsInitPid(scanRoot, namespace, id) 426 } 427 428 return -1 429 } 430 431 func runcInitPid(scanRoot string, id string) int { 432 // If a container is running by runc, the init pid is stored in the grpc status file. 433 // status file is located at the 434 // <scanRoot>/<criPluginStatusFilePrefix>/<container_id>/status path. 435 statusPath := filepath.Join(scanRoot, criPluginStatusFilePrefix, id, "status") 436 if _, err := os.Stat(statusPath); err != nil { 437 log.Info("File status does not exists for container %v, error: %v", id, err) 438 return -1 439 } 440 441 err := fileSizeCheck(statusPath, defaultMaxFileSize) 442 if err != nil { 443 return -1 444 } 445 446 initPID := -1 447 448 statusContent, err := os.ReadFile(statusPath) 449 if err != nil { 450 log.Errorf("Could not read for %s status for container: %v", id, err) 451 return -1 452 } 453 var grpcContainerStatus map[string]*json.RawMessage 454 if err := json.Unmarshal(statusContent, &grpcContainerStatus); err != nil { 455 log.Errorf("Can't unmarshal status for container %v , error: %v", id, err) 456 return -1 457 } 458 459 if _, ok := grpcContainerStatus["Pid"]; !ok { 460 log.Errorf("Can't find field pid filed in status for container %v", id) 461 return -1 462 } 463 if err := json.Unmarshal(*grpcContainerStatus["Pid"], &initPID); err != nil { 464 log.Errorf("Can't unmarshal pid in status for container %v, error: %v", id, err) 465 return -1 466 } 467 468 return initPID 469 } 470 471 func runhcsInitPid(scanRoot string, namespace string, id string) int { 472 // If a container is running by runhcs, the init pid is stored in the runhcs shim.pid file. 473 // shim.pid file is located at the 474 // <scanRoot>/<runhcsStateFilePrefix>/<namespace_name>/<container_id>/shim.pid. 475 shimPIDPath := filepath.Join(scanRoot, runhcsStateFilePrefix, namespace, id, "shim.pid") 476 if _, err := os.Stat(shimPIDPath); err != nil { 477 log.Info("File shim.pid does not exists for container %v, error: %v", id, err) 478 return -1 479 } 480 481 shimPIDContent, err := os.ReadFile(shimPIDPath) 482 if err != nil { 483 log.Errorf("Could not read for %s shim.pid for container: %v", id, err) 484 return -1 485 } 486 shimPidStr := strings.TrimSpace(string(shimPIDContent)) 487 initPID, err := strconv.Atoi(shimPidStr) 488 if err != nil { 489 log.Errorf("Can't convert shim.pid content to int for container %v, error: %v", id, err) 490 return -1 491 } 492 return initPID 493 }