github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/binary/classifier.go (about) 1 package binary 2 3 import ( 4 "bytes" 5 "debug/elf" 6 "debug/macho" 7 "debug/pe" 8 "encoding/json" 9 "fmt" 10 "io" 11 "regexp" 12 "strings" 13 "text/template" 14 15 "github.com/anchore/packageurl-go" 16 "github.com/anchore/syft/internal" 17 "github.com/anchore/syft/internal/log" 18 "github.com/anchore/syft/syft/cpe" 19 "github.com/anchore/syft/syft/file" 20 "github.com/anchore/syft/syft/pkg" 21 ) 22 23 // Classifier is a generic package classifier that can be used to match a package definition 24 // to a file that meets the given content criteria of the EvidenceMatcher. 25 type Classifier struct { 26 Class string `json:"class"` 27 28 // FileGlob is a selector to narrow down file inspection using the **/glob* syntax 29 FileGlob string `json:"fileGlob"` 30 31 // EvidenceMatcher is what will be used to match against the file in the source 32 // location. If the matcher returns a package, the file will be considered a candidate. 33 EvidenceMatcher EvidenceMatcher `json:"-"` 34 35 // Information below is used to specify the Package information when returned 36 37 // Package is the name to use for the package 38 Package string `json:"package"` 39 40 // PURL is the Package URL to use when generating a package 41 PURL packageurl.PackageURL `json:"purl"` 42 43 // CPEs are the specific CPEs we want to include for this binary with updated version information 44 CPEs []cpe.CPE `json:"cpes"` 45 } 46 47 func (cfg Classifier) MarshalJSON() ([]byte, error) { 48 type marshalled struct { 49 Class string `json:"class"` 50 FileGlob string `json:"fileGlob"` 51 Package string `json:"package"` 52 PURL string `json:"purl"` 53 CPEs []string `json:"cpes"` 54 } 55 56 var marshalledCPEs []string 57 for _, c := range cfg.CPEs { 58 marshalledCPEs = append(marshalledCPEs, c.Attributes.BindToFmtString()) 59 } 60 61 m := marshalled{ 62 Class: cfg.Class, 63 FileGlob: cfg.FileGlob, 64 Package: cfg.Package, 65 PURL: cfg.PURL.String(), 66 CPEs: marshalledCPEs, 67 } 68 69 return json.Marshal(m) 70 } 71 72 // EvidenceMatcher is a function called to catalog Packages that match some sort of evidence 73 type EvidenceMatcher func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) 74 75 func evidenceMatchers(matchers ...EvidenceMatcher) EvidenceMatcher { 76 return func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) { 77 for _, matcher := range matchers { 78 match, err := matcher(resolver, classifier, location) 79 if err != nil { 80 return nil, err 81 } 82 if match != nil { 83 return match, nil 84 } 85 } 86 return nil, nil 87 } 88 } 89 90 func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) EvidenceMatcher { 91 pat := regexp.MustCompile(fileNamePattern) 92 return func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) { 93 if !pat.MatchString(location.RealPath) { 94 return nil, nil 95 } 96 97 filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, location.RealPath) 98 99 // versions like 3.5 should not match any character, but explicit dot 100 for k, v := range filepathNamedGroupValues { 101 filepathNamedGroupValues[k] = strings.ReplaceAll(v, ".", "\\.") 102 } 103 104 tmpl, err := template.New("").Parse(contentTemplate) 105 if err != nil { 106 return nil, fmt.Errorf("unable to parse classifier template=%q : %w", contentTemplate, err) 107 } 108 109 patternBuf := &bytes.Buffer{} 110 err = tmpl.Execute(patternBuf, filepathNamedGroupValues) 111 if err != nil { 112 return nil, fmt.Errorf("unable to render template: %w", err) 113 } 114 115 tmplPattern, err := regexp.Compile(patternBuf.String()) 116 if err != nil { 117 return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err) 118 } 119 120 contents, err := getContents(resolver, location) 121 if err != nil { 122 return nil, fmt.Errorf("unable to get read contents for file: %w", err) 123 } 124 125 matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents)) 126 127 p := newClassifierPackage(classifier, location, matchMetadata) 128 if p == nil { 129 return nil, nil 130 } 131 132 return []pkg.Package{*p}, nil 133 } 134 } 135 136 func FileContentsVersionMatcher(pattern string) EvidenceMatcher { 137 pat := regexp.MustCompile(pattern) 138 return func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) { 139 contents, err := getContents(resolver, location) 140 if err != nil { 141 return nil, fmt.Errorf("unable to get read contents for file: %w", err) 142 } 143 144 matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents)) 145 146 p := newClassifierPackage(classifier, location, matchMetadata) 147 if p == nil { 148 return nil, nil 149 } 150 151 return []pkg.Package{*p}, nil 152 } 153 } 154 155 // matchExcluding tests the provided regular expressions against the file, and if matched, DOES NOT return 156 // anything that the matcher would otherwise return 157 func matchExcluding(matcher EvidenceMatcher, contentPatternsToExclude ...string) EvidenceMatcher { 158 var nonMatchPatterns []*regexp.Regexp 159 for _, p := range contentPatternsToExclude { 160 nonMatchPatterns = append(nonMatchPatterns, regexp.MustCompile(p)) 161 } 162 return func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) { 163 contents, err := getContents(resolver, location) 164 if err != nil { 165 return nil, fmt.Errorf("unable to get read contents for file: %w", err) 166 } 167 for _, nonMatch := range nonMatchPatterns { 168 if nonMatch.Match(contents) { 169 return nil, nil 170 } 171 } 172 return matcher(resolver, classifier, location) 173 } 174 } 175 176 //nolint:gocognit 177 func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher EvidenceMatcher) EvidenceMatcher { 178 pat := regexp.MustCompile(sharedLibraryPattern) 179 return func(resolver file.Resolver, classifier Classifier, location file.Location) (packages []pkg.Package, _ error) { 180 libs, err := sharedLibraries(resolver, location) 181 if err != nil { 182 return nil, err 183 } 184 for _, lib := range libs { 185 if !pat.MatchString(lib) { 186 continue 187 } 188 189 locations, err := resolver.FilesByGlob("**/" + lib) 190 if err != nil { 191 return nil, err 192 } 193 for _, libraryLocation := range locations { 194 pkgs, err := sharedLibraryMatcher(resolver, classifier, libraryLocation) 195 if err != nil { 196 return nil, err 197 } 198 for _, p := range pkgs { 199 // set the source binary as the first location 200 locationSet := file.NewLocationSet(location) 201 locationSet.Add(p.Locations.ToSlice()...) 202 p.Locations = locationSet 203 meta, _ := p.Metadata.(pkg.BinarySignature) 204 p.Metadata = pkg.BinarySignature{ 205 Matches: append([]pkg.ClassifierMatch{ 206 { 207 Classifier: classifier.Class, 208 Location: location, 209 }, 210 }, meta.Matches...), 211 } 212 packages = append(packages, p) 213 } 214 } 215 } 216 return packages, nil 217 } 218 } 219 220 func mustPURL(purl string) packageurl.PackageURL { 221 p, err := packageurl.FromString(purl) 222 if err != nil { 223 panic(fmt.Sprintf("invalid PURL: %s", p)) 224 } 225 return p 226 } 227 228 func getContents(resolver file.Resolver, location file.Location) ([]byte, error) { 229 reader, err := resolver.FileContentsByLocation(location) 230 if err != nil { 231 return nil, err 232 } 233 defer internal.CloseAndLogError(reader, location.AccessPath) 234 235 // TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader. 236 contents, err := io.ReadAll(reader) 237 if err != nil { 238 return nil, fmt.Errorf("unable to get contents for file: %w", err) 239 } 240 241 return contents, nil 242 } 243 244 // singleCPE returns a []cpe.CPE with Source: Generated based on the cpe string or panics if the 245 // cpe string cannot be parsed into valid CPE Attributes 246 func singleCPE(cpeString string) []cpe.CPE { 247 return []cpe.CPE{ 248 cpe.Must(cpeString, cpe.GeneratedSource), 249 } 250 } 251 252 // sharedLibraries returns a list of all shared libraries found within a binary, currently 253 // supporting: elf, macho, and windows pe 254 func sharedLibraries(resolver file.Resolver, location file.Location) ([]string, error) { 255 contents, err := getContents(resolver, location) 256 if err != nil { 257 return nil, err 258 } 259 260 r := bytes.NewReader(contents) 261 262 e, _ := elf.NewFile(r) 263 if e != nil { 264 symbols, err := e.ImportedLibraries() 265 if err != nil { 266 log.Debugf("unable to read elf binary at: %s -- %s", location.RealPath, err) 267 } 268 return symbols, nil 269 } 270 271 m, _ := macho.NewFile(r) 272 if m != nil { 273 symbols, err := m.ImportedLibraries() 274 if err != nil { 275 log.Debugf("unable to read macho binary at: %s -- %s", location.RealPath, err) 276 } 277 return symbols, nil 278 } 279 280 p, _ := pe.NewFile(r) 281 if p != nil { 282 symbols, err := p.ImportedLibraries() 283 if err != nil { 284 log.Debugf("unable to read pe binary at: %s -- %s", location.RealPath, err) 285 } 286 return symbols, nil 287 } 288 289 return nil, nil 290 }