github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/containers/k8simage/k8simage.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package k8simage extracts container image references from Kubernetes YAML files. 16 package k8simage 17 18 import ( 19 "context" 20 "errors" 21 "fmt" 22 "io" 23 "path/filepath" 24 "strings" 25 26 "github.com/google/osv-scalibr/extractor" 27 "github.com/google/osv-scalibr/extractor/filesystem" 28 "github.com/google/osv-scalibr/extractor/filesystem/internal/units" 29 "github.com/google/osv-scalibr/inventory" 30 "github.com/google/osv-scalibr/log" 31 "github.com/google/osv-scalibr/plugin" 32 "github.com/google/osv-scalibr/purl" 33 "github.com/google/osv-scalibr/stats" 34 "gopkg.in/yaml.v3" 35 ) 36 37 const ( 38 // Name is the unique name of this extractor. 39 Name = "containers/k8simage" 40 41 // DefaultMaxFileSizeBytes is the default maximum file size the extractor will 42 // attempt to process. If a file is encountered that is larger than this 43 // limit, the file is skipped during processing. 44 DefaultMaxFileSizeBytes = 1 * units.MiB 45 ) 46 47 // k8sResource represents a Kubernetes resource with the fields needed for image extraction. 48 type k8sResource struct { 49 APIVersion string `yaml:"apiVersion"` 50 Kind string `yaml:"kind"` 51 Spec *k8sSpec `yaml:"spec,omitempty"` 52 } 53 54 // k8sSpec represents the spec section of a Kubernetes resource. 55 type k8sSpec struct { 56 Containers []container `yaml:"containers,omitempty"` 57 InitContainers []container `yaml:"initContainers,omitempty"` 58 Template *podTemplate `yaml:"template,omitempty"` 59 JobTemplate *jobTemplate `yaml:"jobTemplate,omitempty"` 60 } 61 62 // jobTemplate represents a job template in CronJob resources. 63 type jobTemplate struct { 64 Spec *jobSpec `yaml:"spec,omitempty"` 65 } 66 67 // jobSpec represents the spec of a Job. 68 type jobSpec struct { 69 Template *podTemplate `yaml:"template,omitempty"` 70 } 71 72 // podTemplate represents a pod template in Kubernetes resources. 73 type podTemplate struct { 74 Spec *podSpec `yaml:"spec,omitempty"` 75 } 76 77 // podSpec represents a pod specification. 78 type podSpec struct { 79 Containers []container `yaml:"containers,omitempty"` 80 InitContainers []container `yaml:"initContainers,omitempty"` 81 } 82 83 // container represents a container specification in Kubernetes. 84 type container struct { 85 Image string `yaml:"image"` 86 } 87 88 // Config is the configuration for the Extractor. 89 type Config struct { 90 // Stats is a stats collector for reporting metrics. 91 Stats stats.Collector 92 // MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If 93 // `FileRequired` receives a larger file, it will return false. 94 MaxFileSizeBytes int64 95 } 96 97 // DefaultConfig returns the default configuration for the extractor. 98 func DefaultConfig() Config { 99 return Config{ 100 MaxFileSizeBytes: DefaultMaxFileSizeBytes, 101 } 102 } 103 104 // Extractor extracts container image references from Kubernetes YAML files. 105 type Extractor struct { 106 stats stats.Collector 107 maxFileSizeBytes int64 108 } 109 110 // New returns a Kubernetes container image extractor. 111 // 112 // For most use cases, initialize with: 113 // ``` 114 // e := New(DefaultConfig()) 115 // ``` 116 func New(cfg Config) *Extractor { 117 return &Extractor{ 118 stats: cfg.Stats, 119 maxFileSizeBytes: cfg.MaxFileSizeBytes, 120 } 121 } 122 123 // NewDefault returns an extractor with the default config settings. 124 func NewDefault() filesystem.Extractor { return New(DefaultConfig()) } 125 126 // Name of the extractor. 127 func (e Extractor) Name() string { return Name } 128 129 // Version of the extractor. 130 func (e Extractor) Version() int { return 0 } 131 132 // Requirements of the extractor. 133 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 134 135 // FileRequired returns true if the specified file looks like a Kubernetes YAML file. 136 // It determines if the specified file is a Kubernetes YAML file that should be processed 137 // by checking the file extension (.yaml or .yml). 138 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 139 // Only consider YAML/YML files 140 path := api.Path() 141 ext := strings.ToLower(filepath.Ext(path)) 142 return ext == ".yaml" || ext == ".yml" 143 } 144 145 // Extract extracts container image references from a K8s configuration file. 146 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 147 if input.Info == nil { 148 return inventory.Inventory{}, errors.New("input.Info is nil") 149 } 150 if input.Info.Size() > e.maxFileSizeBytes { 151 // Skip file that exceeds size limit. 152 log.Infof("Skipping too large file: %s", input.Path) 153 return inventory.Inventory{}, nil 154 } 155 156 images, err := parseK8sYAML(ctx, input.Reader) 157 if err != nil { 158 // Not a K8s YAML file. 159 //nolint:nilerr 160 return inventory.Inventory{}, nil 161 } 162 163 var pkgs []*extractor.Package 164 for _, image := range images { 165 name, version := parseName(image) 166 pkgs = append(pkgs, &extractor.Package{ 167 Locations: []string{input.Path}, 168 Name: name, 169 Version: version, 170 PURLType: purl.TypeK8s, 171 }) 172 } 173 174 return inventory.Inventory{Packages: pkgs}, nil 175 } 176 177 // parseName parses a container image name to extract the name and version/digest. 178 // It handles both digest (@sha256:...) and tag (:tag) formats. 179 // See: https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy 180 func parseName(name string) (string, string) { 181 // Handle digest format (tag@HashType:HashValue) 182 if strings.Contains(name, "@") { 183 parts := strings.SplitN(name, "@", 2) 184 return parts[0], parts[1] 185 } 186 // Handle tag format (registry:port/namespace/image:tag) 187 // Use LastIndex to find the rightmost colon which separates the tag 188 if lastColonIndex := strings.LastIndex(name, ":"); lastColonIndex != -1 { 189 return name[:lastColonIndex], name[lastColonIndex+1:] 190 } 191 192 return name, "latest" 193 } 194 195 // parseK8sYAML extracts container images from Kubernetes YAML documents. 196 // It supports multi-document YAML files and validates that each document 197 // contains the required apiVersion and kind fields. 198 func parseK8sYAML(ctx context.Context, r io.Reader) ([]string, error) { 199 decoder := yaml.NewDecoder(r) 200 var images []string 201 for { 202 // Check for context cancellation during parsing 203 if err := ctx.Err(); err != nil { 204 return images, fmt.Errorf("parseK8sYAML halted due to context error: %w", err) 205 } 206 207 // Parse each YAML document in the file 208 var doc k8sResource 209 if err := decoder.Decode(&doc); err != nil { 210 if errors.Is(err, io.EOF) { 211 break 212 } 213 return nil, fmt.Errorf("failed to parse Kubernetes YAML: %w", err) 214 } 215 // Check if the document is a Kubernetes resource by checking for "apiVersion" and "kind" fields 216 if doc.APIVersion == "" || doc.Kind == "" { 217 return nil, errors.New("not a Kubernetes configuration file: missing 'apiVersion' or 'kind'") 218 } 219 // Extract images from the document 220 extractedImages := extractImagesFromK8sResource(&doc) 221 images = append(images, extractedImages...) 222 } 223 224 return images, nil 225 } 226 227 // extractImagesFromK8sResource extracts container images from a Kubernetes resource. 228 // It handles various resource types including Pods, Deployments, StatefulSets, Jobs, and CronJobs. 229 func extractImagesFromK8sResource(doc *k8sResource) []string { 230 var images []string 231 232 if doc.Spec == nil { 233 return images 234 } 235 236 // Check for direct containers at spec.containers 237 images = append(images, getImagesFromContainerList(doc.Spec.Containers)...) 238 // Handle initContainers 239 images = append(images, getImagesFromContainerList(doc.Spec.InitContainers)...) 240 241 // Check for template-based resources (Deployments, StatefulSets, etc.) 242 if doc.Spec.Template != nil && doc.Spec.Template.Spec != nil { 243 images = append(images, getImagesFromContainerList(doc.Spec.Template.Spec.Containers)...) 244 images = append(images, getImagesFromContainerList(doc.Spec.Template.Spec.InitContainers)...) 245 } 246 247 // Handle CronJob/Job templates 248 if doc.Spec.JobTemplate != nil && doc.Spec.JobTemplate.Spec != nil && 249 doc.Spec.JobTemplate.Spec.Template != nil && doc.Spec.JobTemplate.Spec.Template.Spec != nil { 250 images = append(images, getImagesFromContainerList(doc.Spec.JobTemplate.Spec.Template.Spec.Containers)...) 251 images = append(images, getImagesFromContainerList(doc.Spec.JobTemplate.Spec.Template.Spec.InitContainers)...) 252 } 253 254 return images 255 } 256 257 // getImagesFromContainerList extracts image references from a list of containers, 258 // filtering out any containers with empty image fields. 259 func getImagesFromContainerList(containers []container) []string { 260 var images []string 261 for _, container := range containers { 262 if container.Image != "" { 263 images = append(images, container.Image) 264 } 265 } 266 return images 267 }