github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/containers/dockerbaseimage/dockerbaseimage.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package dockerbaseimage extracts base image urls from Dockerfiles. 16 package dockerbaseimage 17 18 import ( 19 "context" 20 "errors" 21 "fmt" 22 "io" 23 "path/filepath" 24 "slices" 25 "strings" 26 27 "github.com/google/osv-scalibr/extractor" 28 "github.com/google/osv-scalibr/extractor/filesystem" 29 "github.com/google/osv-scalibr/extractor/filesystem/internal/units" 30 "github.com/google/osv-scalibr/inventory" 31 "github.com/google/osv-scalibr/log" 32 "github.com/google/osv-scalibr/plugin" 33 "github.com/google/osv-scalibr/purl" 34 "github.com/google/osv-scalibr/stats" 35 "github.com/moby/buildkit/frontend/dockerfile/linter" 36 37 mbi "github.com/moby/buildkit/frontend/dockerfile/instructions" 38 mbp "github.com/moby/buildkit/frontend/dockerfile/parser" 39 ) 40 41 const ( 42 // Name is the unique name of this extractor. 43 Name = "containers/dockerbaseimage" 44 45 // DefaultMaxFileSizeBytes is the default maximum file size the extractor will 46 // attempt to extract. If a file is encountered that is larger than this 47 // limit, the file is ignored by `FileRequired`. 48 DefaultMaxFileSizeBytes = 1 * units.MiB 49 ) 50 51 var ( 52 // dockerBaseContainers is a list of reserved terms/base containers that can be used within a 53 // Dockerfile (e.g. "scratch" is Docker's reserved, minimal image) and require special handling. 54 dockerBaseContainers = []string{"scratch"} 55 ) 56 57 // Config is the configuration for the Extractor. 58 type Config struct { 59 // Stats is a stats collector for reporting metrics. 60 Stats stats.Collector 61 // MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If 62 // `FileRequired` gets a bigger file, it will return false, 63 MaxFileSizeBytes int64 64 } 65 66 // DefaultConfig returns the default configuration for the extractor. 67 func DefaultConfig() Config { 68 return Config{ 69 MaxFileSizeBytes: DefaultMaxFileSizeBytes, 70 } 71 } 72 73 // Extractor extracts repository URLs from Dockerfiles. 74 type Extractor struct { 75 stats stats.Collector 76 maxFileSizeBytes int64 77 } 78 79 // New returns a Dockerfile repository extractor. 80 // 81 // For most use cases, initialize with: 82 // ``` 83 // e := New(DefaultConfig()) 84 // ``` 85 func New(cfg Config) *Extractor { 86 return &Extractor{ 87 stats: cfg.Stats, 88 maxFileSizeBytes: cfg.MaxFileSizeBytes, 89 } 90 } 91 92 // NewDefault returns an extractor with the default config settings. 93 func NewDefault() filesystem.Extractor { return New(DefaultConfig()) } 94 95 // Name of the extractor. 96 func (e Extractor) Name() string { return Name } 97 98 // Version of the extractor. 99 func (e Extractor) Version() int { return 0 } 100 101 // FileRequired returns true if the specified file matches Dockerfile. 102 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 103 fileName := filepath.Base(api.Path()) 104 ext := filepath.Ext(fileName) 105 baseName := strings.TrimSuffix(fileName, ext) 106 return strings.ToLower(baseName) == "dockerfile" || strings.ToLower(ext) == ".dockerfile" 107 } 108 109 // Extract extracts base image urls from a Dockerfile. 110 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 111 if input.Info == nil { 112 return inventory.Inventory{}, errors.New("input.Info is nil") 113 } 114 if input.Info.Size() > e.maxFileSizeBytes { 115 // Skipping too large file. 116 log.Infof("Skipping too large file: %s", input.Path) 117 return inventory.Inventory{}, nil 118 } 119 120 stages, args, err := parse(input.Reader) 121 if err != nil { 122 log.Warnf("Parsing error: %v", err) 123 return inventory.Inventory{}, err 124 } 125 126 argsMap := toMap(args) 127 baseContainers := uniqueContainers(stages) 128 129 var pkgs []*extractor.Package 130 for _, container := range baseContainers { 131 resolvedName := resolveName(container, argsMap) 132 133 name, version := parseName(resolvedName) 134 135 pkgs = append(pkgs, &extractor.Package{ 136 Locations: []string{input.Path}, 137 Name: name, 138 Version: version, 139 PURLType: purl.TypeDocker, 140 }) 141 } 142 143 return inventory.Inventory{Packages: pkgs}, nil 144 } 145 146 // Requirements of the extractor. 147 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 148 149 func resolveName(name string, argsMap map[string]string) string { 150 if !strings.HasPrefix(name, "$") { 151 return name 152 } 153 resolved := argsMap[strings.Trim(name, "${}")] 154 if resolved == "" { 155 return name 156 } 157 return resolved 158 } 159 160 func parseName(name string) (string, string) { 161 if strings.Contains(name, "@") { 162 parts := strings.SplitN(name, "@", 2) 163 return parts[0], parts[1] 164 } 165 166 if strings.Contains(name, ":") { 167 parts := strings.SplitN(name, ":", 2) 168 return parts[0], parts[1] 169 } 170 171 return name, "latest" 172 } 173 174 func toMap(args []mbi.ArgCommand) map[string]string { 175 m := make(map[string]string) 176 for _, arg := range args { 177 for _, arg := range arg.Args { 178 if arg.Value != nil { 179 m[arg.Key] = *arg.Value 180 } 181 } 182 } 183 return m 184 } 185 186 func uniqueContainers(stages []mbi.Stage) []string { 187 stagesSeen := make(map[string]bool) 188 containersSeen := make(map[string]bool) 189 var baseContainers []string 190 for _, stage := range stages { 191 if slices.Contains(dockerBaseContainers, stage.BaseName) { 192 // Skip base containers that are reserved or special values. 193 continue 194 } 195 stagesSeen[stage.Name] = true 196 if stagesSeen[stage.BaseName] { 197 continue 198 } 199 baseContainer := stage.BaseName 200 if containersSeen[baseContainer] { 201 continue 202 } 203 baseContainers = append(baseContainers, baseContainer) 204 containersSeen[baseContainer] = true 205 } 206 return baseContainers 207 } 208 209 func parse(r io.Reader) ([]mbi.Stage, []mbi.ArgCommand, error) { 210 p, err := mbp.Parse(r) 211 if err != nil { 212 return nil, nil, fmt.Errorf("failed to parse dockerfile: %w", err) 213 } 214 215 return mbi.Parse(p.AST, linter.New(&linter.Config{})) 216 }