github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/containers/k8simage/k8simage.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package k8simage extracts container image references from Kubernetes YAML files.
    16  package k8simage
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"path/filepath"
    24  	"strings"
    25  
    26  	"github.com/google/osv-scalibr/extractor"
    27  	"github.com/google/osv-scalibr/extractor/filesystem"
    28  	"github.com/google/osv-scalibr/extractor/filesystem/internal/units"
    29  	"github.com/google/osv-scalibr/inventory"
    30  	"github.com/google/osv-scalibr/log"
    31  	"github.com/google/osv-scalibr/plugin"
    32  	"github.com/google/osv-scalibr/purl"
    33  	"github.com/google/osv-scalibr/stats"
    34  	"gopkg.in/yaml.v3"
    35  )
    36  
    37  const (
    38  	// Name is the unique name of this extractor.
    39  	Name = "containers/k8simage"
    40  
    41  	// DefaultMaxFileSizeBytes is the default maximum file size the extractor will
    42  	// attempt to process. If a file is encountered that is larger than this
    43  	// limit, the file is skipped during processing.
    44  	DefaultMaxFileSizeBytes = 1 * units.MiB
    45  )
    46  
    47  // k8sResource represents a Kubernetes resource with the fields needed for image extraction.
    48  type k8sResource struct {
    49  	APIVersion string   `yaml:"apiVersion"`
    50  	Kind       string   `yaml:"kind"`
    51  	Spec       *k8sSpec `yaml:"spec,omitempty"`
    52  }
    53  
    54  // k8sSpec represents the spec section of a Kubernetes resource.
    55  type k8sSpec struct {
    56  	Containers     []container  `yaml:"containers,omitempty"`
    57  	InitContainers []container  `yaml:"initContainers,omitempty"`
    58  	Template       *podTemplate `yaml:"template,omitempty"`
    59  	JobTemplate    *jobTemplate `yaml:"jobTemplate,omitempty"`
    60  }
    61  
    62  // jobTemplate represents a job template in CronJob resources.
    63  type jobTemplate struct {
    64  	Spec *jobSpec `yaml:"spec,omitempty"`
    65  }
    66  
    67  // jobSpec represents the spec of a Job.
    68  type jobSpec struct {
    69  	Template *podTemplate `yaml:"template,omitempty"`
    70  }
    71  
    72  // podTemplate represents a pod template in Kubernetes resources.
    73  type podTemplate struct {
    74  	Spec *podSpec `yaml:"spec,omitempty"`
    75  }
    76  
    77  // podSpec represents a pod specification.
    78  type podSpec struct {
    79  	Containers     []container `yaml:"containers,omitempty"`
    80  	InitContainers []container `yaml:"initContainers,omitempty"`
    81  }
    82  
    83  // container represents a container specification in Kubernetes.
    84  type container struct {
    85  	Image string `yaml:"image"`
    86  }
    87  
    88  // Config is the configuration for the Extractor.
    89  type Config struct {
    90  	// Stats is a stats collector for reporting metrics.
    91  	Stats stats.Collector
    92  	// MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If
    93  	// `FileRequired` receives a larger file, it will return false.
    94  	MaxFileSizeBytes int64
    95  }
    96  
    97  // DefaultConfig returns the default configuration for the extractor.
    98  func DefaultConfig() Config {
    99  	return Config{
   100  		MaxFileSizeBytes: DefaultMaxFileSizeBytes,
   101  	}
   102  }
   103  
   104  // Extractor extracts container image references from Kubernetes YAML files.
   105  type Extractor struct {
   106  	stats            stats.Collector
   107  	maxFileSizeBytes int64
   108  }
   109  
   110  // New returns a Kubernetes container image extractor.
   111  //
   112  // For most use cases, initialize with:
   113  // ```
   114  // e := New(DefaultConfig())
   115  // ```
   116  func New(cfg Config) *Extractor {
   117  	return &Extractor{
   118  		stats:            cfg.Stats,
   119  		maxFileSizeBytes: cfg.MaxFileSizeBytes,
   120  	}
   121  }
   122  
   123  // NewDefault returns an extractor with the default config settings.
   124  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
   125  
   126  // Name of the extractor.
   127  func (e Extractor) Name() string { return Name }
   128  
   129  // Version of the extractor.
   130  func (e Extractor) Version() int { return 0 }
   131  
   132  // Requirements of the extractor.
   133  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
   134  
   135  // FileRequired returns true if the specified file looks like a Kubernetes YAML file.
   136  // It determines if the specified file is a Kubernetes YAML file that should be processed
   137  // by checking the file extension (.yaml or .yml).
   138  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
   139  	// Only consider YAML/YML files
   140  	path := api.Path()
   141  	ext := strings.ToLower(filepath.Ext(path))
   142  	return ext == ".yaml" || ext == ".yml"
   143  }
   144  
   145  // Extract extracts container image references from a K8s configuration file.
   146  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   147  	if input.Info == nil {
   148  		return inventory.Inventory{}, errors.New("input.Info is nil")
   149  	}
   150  	if input.Info.Size() > e.maxFileSizeBytes {
   151  		// Skip file that exceeds size limit.
   152  		log.Infof("Skipping too large file: %s", input.Path)
   153  		return inventory.Inventory{}, nil
   154  	}
   155  
   156  	images, err := parseK8sYAML(ctx, input.Reader)
   157  	if err != nil {
   158  		// Not a K8s YAML file.
   159  		//nolint:nilerr
   160  		return inventory.Inventory{}, nil
   161  	}
   162  
   163  	var pkgs []*extractor.Package
   164  	for _, image := range images {
   165  		name, version := parseName(image)
   166  		pkgs = append(pkgs, &extractor.Package{
   167  			Locations: []string{input.Path},
   168  			Name:      name,
   169  			Version:   version,
   170  			PURLType:  purl.TypeK8s,
   171  		})
   172  	}
   173  
   174  	return inventory.Inventory{Packages: pkgs}, nil
   175  }
   176  
   177  // parseName parses a container image name to extract the name and version/digest.
   178  // It handles both digest (@sha256:...) and tag (:tag) formats.
   179  // See: https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy
   180  func parseName(name string) (string, string) {
   181  	// Handle digest format (tag@HashType:HashValue)
   182  	if strings.Contains(name, "@") {
   183  		parts := strings.SplitN(name, "@", 2)
   184  		return parts[0], parts[1]
   185  	}
   186  	// Handle tag format (registry:port/namespace/image:tag)
   187  	// Use LastIndex to find the rightmost colon which separates the tag
   188  	if lastColonIndex := strings.LastIndex(name, ":"); lastColonIndex != -1 {
   189  		return name[:lastColonIndex], name[lastColonIndex+1:]
   190  	}
   191  
   192  	return name, "latest"
   193  }
   194  
   195  // parseK8sYAML extracts container images from Kubernetes YAML documents.
   196  // It supports multi-document YAML files and validates that each document
   197  // contains the required apiVersion and kind fields.
   198  func parseK8sYAML(ctx context.Context, r io.Reader) ([]string, error) {
   199  	decoder := yaml.NewDecoder(r)
   200  	var images []string
   201  	for {
   202  		// Check for context cancellation during parsing
   203  		if err := ctx.Err(); err != nil {
   204  			return images, fmt.Errorf("parseK8sYAML halted due to context error: %w", err)
   205  		}
   206  
   207  		// Parse each YAML document in the file
   208  		var doc k8sResource
   209  		if err := decoder.Decode(&doc); err != nil {
   210  			if errors.Is(err, io.EOF) {
   211  				break
   212  			}
   213  			return nil, fmt.Errorf("failed to parse Kubernetes YAML: %w", err)
   214  		}
   215  		// Check if the document is a Kubernetes resource by checking for "apiVersion" and "kind" fields
   216  		if doc.APIVersion == "" || doc.Kind == "" {
   217  			return nil, errors.New("not a Kubernetes configuration file: missing 'apiVersion' or 'kind'")
   218  		}
   219  		// Extract images from the document
   220  		extractedImages := extractImagesFromK8sResource(&doc)
   221  		images = append(images, extractedImages...)
   222  	}
   223  
   224  	return images, nil
   225  }
   226  
   227  // extractImagesFromK8sResource extracts container images from a Kubernetes resource.
   228  // It handles various resource types including Pods, Deployments, StatefulSets, Jobs, and CronJobs.
   229  func extractImagesFromK8sResource(doc *k8sResource) []string {
   230  	var images []string
   231  
   232  	if doc.Spec == nil {
   233  		return images
   234  	}
   235  
   236  	// Check for direct containers at spec.containers
   237  	images = append(images, getImagesFromContainerList(doc.Spec.Containers)...)
   238  	// Handle initContainers
   239  	images = append(images, getImagesFromContainerList(doc.Spec.InitContainers)...)
   240  
   241  	// Check for template-based resources (Deployments, StatefulSets, etc.)
   242  	if doc.Spec.Template != nil && doc.Spec.Template.Spec != nil {
   243  		images = append(images, getImagesFromContainerList(doc.Spec.Template.Spec.Containers)...)
   244  		images = append(images, getImagesFromContainerList(doc.Spec.Template.Spec.InitContainers)...)
   245  	}
   246  
   247  	// Handle CronJob/Job templates
   248  	if doc.Spec.JobTemplate != nil && doc.Spec.JobTemplate.Spec != nil &&
   249  		doc.Spec.JobTemplate.Spec.Template != nil && doc.Spec.JobTemplate.Spec.Template.Spec != nil {
   250  		images = append(images, getImagesFromContainerList(doc.Spec.JobTemplate.Spec.Template.Spec.Containers)...)
   251  		images = append(images, getImagesFromContainerList(doc.Spec.JobTemplate.Spec.Template.Spec.InitContainers)...)
   252  	}
   253  
   254  	return images
   255  }
   256  
   257  // getImagesFromContainerList extracts image references from a list of containers,
   258  // filtering out any containers with empty image fields.
   259  func getImagesFromContainerList(containers []container) []string {
   260  	var images []string
   261  	for _, container := range containers {
   262  		if container.Image != "" {
   263  			images = append(images, container.Image)
   264  		}
   265  	}
   266  	return images
   267  }