github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/binary/classifier.go (about) 1 package binary 2 3 import ( 4 "bytes" 5 "debug/elf" 6 "debug/macho" 7 "debug/pe" 8 "fmt" 9 "io" 10 "regexp" 11 "strings" 12 "text/template" 13 14 "github.com/anchore/packageurl-go" 15 "github.com/anchore/syft/internal" 16 "github.com/anchore/syft/internal/log" 17 "github.com/anchore/syft/syft/cpe" 18 "github.com/anchore/syft/syft/file" 19 "github.com/anchore/syft/syft/pkg" 20 "github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader" 21 ) 22 23 var emptyPURL = packageurl.PackageURL{} 24 25 // classifier is a generic package classifier that can be used to match a package definition 26 // to a file that meets the given content criteria of the evidenceMatcher. 27 type classifier struct { 28 Class string 29 30 // FileGlob is a selector to narrow down file inspection using the **/glob* syntax 31 FileGlob string 32 33 // EvidenceMatcher is what will be used to match against the file in the source 34 // location. If the matcher returns a package, the file will be considered a candidate. 35 EvidenceMatcher evidenceMatcher 36 37 // Information below is used to specify the Package information when returned 38 39 // Package is the name to use for the package 40 Package string 41 42 // Language is the language to classify this package as 43 Language pkg.Language 44 45 // Type is the package type to use for the package 46 Type pkg.Type 47 48 // PURL is the Package URL to use when generating a package 49 PURL packageurl.PackageURL 50 51 // CPEs are the specific CPEs we want to include for this binary with updated version information 52 CPEs []cpe.CPE 53 } 54 55 // evidenceMatcher is a function called to catalog Packages that match some sort of evidence 56 type evidenceMatcher func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) 57 58 func evidenceMatchers(matchers ...evidenceMatcher) evidenceMatcher { 59 return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) { 60 for _, matcher := range matchers { 61 match, err := matcher(resolver, classifier, location) 62 if err != nil { 63 return nil, err 64 } 65 if match != nil { 66 return match, nil 67 } 68 } 69 return nil, nil 70 } 71 } 72 73 func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) evidenceMatcher { 74 pat := regexp.MustCompile(fileNamePattern) 75 return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) { 76 if !pat.MatchString(location.RealPath) { 77 return nil, nil 78 } 79 80 filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, location.RealPath) 81 82 // versions like 3.5 should not match any character, but explicit dot 83 for k, v := range filepathNamedGroupValues { 84 filepathNamedGroupValues[k] = strings.ReplaceAll(v, ".", "\\.") 85 } 86 87 tmpl, err := template.New("").Parse(contentTemplate) 88 if err != nil { 89 return nil, fmt.Errorf("unable to parse classifier template=%q : %w", contentTemplate, err) 90 } 91 92 patternBuf := &bytes.Buffer{} 93 err = tmpl.Execute(patternBuf, filepathNamedGroupValues) 94 if err != nil { 95 return nil, fmt.Errorf("unable to render template: %w", err) 96 } 97 98 tmplPattern, err := regexp.Compile(patternBuf.String()) 99 if err != nil { 100 return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err) 101 } 102 103 contents, err := getContents(resolver, location) 104 if err != nil { 105 return nil, fmt.Errorf("unable to get read contents for file: %w", err) 106 } 107 108 matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents)) 109 110 p := newPackage(classifier, location, matchMetadata) 111 if p == nil { 112 return nil, nil 113 } 114 115 return []pkg.Package{*p}, nil 116 } 117 } 118 119 func fileContentsVersionMatcher(pattern string) evidenceMatcher { 120 pat := regexp.MustCompile(pattern) 121 return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) { 122 contents, err := getContents(resolver, location) 123 if err != nil { 124 return nil, fmt.Errorf("unable to get read contents for file: %w", err) 125 } 126 127 matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents)) 128 129 p := newPackage(classifier, location, matchMetadata) 130 if p == nil { 131 return nil, nil 132 } 133 134 return []pkg.Package{*p}, nil 135 } 136 } 137 138 //nolint:gocognit 139 func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher evidenceMatcher) evidenceMatcher { 140 pat := regexp.MustCompile(sharedLibraryPattern) 141 return func(resolver file.Resolver, classifier classifier, location file.Location) (packages []pkg.Package, _ error) { 142 libs, err := sharedLibraries(resolver, location) 143 if err != nil { 144 return nil, err 145 } 146 for _, lib := range libs { 147 if !pat.MatchString(lib) { 148 continue 149 } 150 151 locations, err := resolver.FilesByGlob("**/" + lib) 152 if err != nil { 153 return nil, err 154 } 155 for _, libraryLocation := range locations { 156 pkgs, err := sharedLibraryMatcher(resolver, classifier, libraryLocation) 157 if err != nil { 158 return nil, err 159 } 160 for _, p := range pkgs { 161 // set the source binary as the first location 162 locationSet := file.NewLocationSet(location) 163 locationSet.Add(p.Locations.ToSlice()...) 164 p.Locations = locationSet 165 meta, _ := p.Metadata.(pkg.BinaryMetadata) 166 p.Metadata = pkg.BinaryMetadata{ 167 Matches: append([]pkg.ClassifierMatch{ 168 { 169 Classifier: classifier.Class, 170 Location: location, 171 }, 172 }, meta.Matches...), 173 } 174 packages = append(packages, p) 175 } 176 } 177 } 178 return packages, nil 179 } 180 } 181 182 func mustPURL(purl string) packageurl.PackageURL { 183 p, err := packageurl.FromString(purl) 184 if err != nil { 185 panic(fmt.Sprintf("invalid PURL: %s", p)) 186 } 187 return p 188 } 189 190 func getContents(resolver file.Resolver, location file.Location) ([]byte, error) { 191 reader, err := resolver.FileContentsByLocation(location) 192 if err != nil { 193 return nil, err 194 } 195 196 unionReader, err := unionreader.GetUnionReader(reader) 197 if err != nil { 198 return nil, fmt.Errorf("unable to get union reader for file: %w", err) 199 } 200 201 // TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader. 202 contents, err := io.ReadAll(unionReader) 203 if err != nil { 204 return nil, fmt.Errorf("unable to get contents for file: %w", err) 205 } 206 207 return contents, nil 208 } 209 210 // singleCPE returns a []pkg.CPE based on the cpe string or panics if the CPE is invalid 211 func singleCPE(cpeString string) []cpe.CPE { 212 return []cpe.CPE{ 213 cpe.Must(cpeString), 214 } 215 } 216 217 // sharedLibraries returns a list of all shared libraries found within a binary, currently 218 // supporting: elf, macho, and windows pe 219 func sharedLibraries(resolver file.Resolver, location file.Location) ([]string, error) { 220 contents, err := getContents(resolver, location) 221 if err != nil { 222 return nil, err 223 } 224 225 r := bytes.NewReader(contents) 226 227 e, _ := elf.NewFile(r) 228 if e != nil { 229 symbols, err := e.ImportedLibraries() 230 if err != nil { 231 log.Debugf("unable to read elf binary at: %s -- %s", location.RealPath, err) 232 } 233 return symbols, nil 234 } 235 236 m, _ := macho.NewFile(r) 237 if m != nil { 238 symbols, err := m.ImportedLibraries() 239 if err != nil { 240 log.Debugf("unable to read macho binary at: %s -- %s", location.RealPath, err) 241 } 242 return symbols, nil 243 } 244 245 p, _ := pe.NewFile(r) 246 if p != nil { 247 symbols, err := p.ImportedLibraries() 248 if err != nil { 249 log.Debugf("unable to read pe binary at: %s -- %s", location.RealPath, err) 250 } 251 return symbols, nil 252 } 253 254 return nil, nil 255 }