github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/binary/classifier.go (about) 1 package binary 2 3 import ( 4 "bytes" 5 "debug/elf" 6 "debug/macho" 7 "debug/pe" 8 "fmt" 9 "io" 10 "regexp" 11 "strings" 12 "text/template" 13 14 "github.com/anchore/packageurl-go" 15 "github.com/anchore/syft/syft/cpe" 16 "github.com/anchore/syft/syft/file" 17 "github.com/anchore/syft/syft/pkg" 18 "github.com/lineaje-labs/syft/internal" 19 "github.com/lineaje-labs/syft/internal/log" 20 "github.com/lineaje-labs/syft/syft/pkg/cataloger/internal/unionreader" 21 ) 22 23 var emptyPURL = packageurl.PackageURL{} 24 25 // classifier is a generic package classifier that can be used to match a package definition 26 // to a file that meets the given content criteria of the evidenceMatcher. 27 type classifier struct { 28 Class string 29 30 // FileGlob is a selector to narrow down file inspection using the **/glob* syntax 31 FileGlob string 32 33 // EvidenceMatcher is what will be used to match against the file in the source 34 // location. If the matcher returns a package, the file will be considered a candidate. 35 EvidenceMatcher evidenceMatcher 36 37 // Information below is used to specify the Package information when returned 38 39 // Package is the name to use for the package 40 Package string 41 42 // Language is the language to classify this package as 43 Language pkg.Language 44 45 // Type is the package type to use for the package 46 Type pkg.Type 47 48 // PURL is the Package URL to use when generating a package 49 PURL packageurl.PackageURL 50 51 // CPEs are the specific CPEs we want to include for this binary with updated version information 52 CPEs []cpe.CPE 53 } 54 55 // evidenceMatcher is a function called to catalog Packages that match some sort of evidence 56 type evidenceMatcher func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) 57 58 func evidenceMatchers(matchers ...evidenceMatcher) evidenceMatcher { 59 return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) { 60 for _, matcher := range matchers { 61 match, err := matcher(resolver, classifier, location) 62 if err != nil { 63 return nil, err 64 } 65 if match != nil { 66 return match, nil 67 } 68 } 69 return nil, nil 70 } 71 } 72 73 func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) evidenceMatcher { 74 pat := regexp.MustCompile(fileNamePattern) 75 return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) { 76 if !pat.MatchString(location.RealPath) { 77 return nil, nil 78 } 79 80 filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, location.RealPath) 81 82 // versions like 3.5 should not match any character, but explicit dot 83 for k, v := range filepathNamedGroupValues { 84 filepathNamedGroupValues[k] = strings.ReplaceAll(v, ".", "\\.") 85 } 86 87 tmpl, err := template.New("").Parse(contentTemplate) 88 if err != nil { 89 return nil, fmt.Errorf("unable to parse classifier template=%q : %w", contentTemplate, err) 90 } 91 92 patternBuf := &bytes.Buffer{} 93 err = tmpl.Execute(patternBuf, filepathNamedGroupValues) 94 if err != nil { 95 return nil, fmt.Errorf("unable to render template: %w", err) 96 } 97 98 tmplPattern, err := regexp.Compile(patternBuf.String()) 99 if err != nil { 100 return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err) 101 } 102 103 contents, err := getContents(resolver, location) 104 if err != nil { 105 return nil, fmt.Errorf("unable to get read contents for file: %w", err) 106 } 107 108 matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents)) 109 110 p := newPackage(classifier, location, matchMetadata) 111 if p == nil { 112 return nil, nil 113 } 114 115 return []pkg.Package{*p}, nil 116 } 117 } 118 119 func fileContentsVersionMatcher(pattern string) evidenceMatcher { 120 pat := regexp.MustCompile(pattern) 121 return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) { 122 contents, err := getContents(resolver, location) 123 if err != nil { 124 return nil, fmt.Errorf("unable to get read contents for file: %w", err) 125 } 126 127 matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents)) 128 129 p := newPackage(classifier, location, matchMetadata) 130 if p == nil { 131 return nil, nil 132 } 133 134 return []pkg.Package{*p}, nil 135 } 136 } 137 138 //nolint:gocognit 139 func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher evidenceMatcher) evidenceMatcher { 140 pat := regexp.MustCompile(sharedLibraryPattern) 141 return func( 142 resolver file.Resolver, classifier classifier, location file.Location, 143 ) (packages []pkg.Package, _ error) { 144 libs, err := sharedLibraries(resolver, location) 145 if err != nil { 146 return nil, err 147 } 148 for _, lib := range libs { 149 if !pat.MatchString(lib) { 150 continue 151 } 152 153 locations, err := resolver.FilesByGlob("**/" + lib) 154 if err != nil { 155 return nil, err 156 } 157 for _, libraryLocation := range locations { 158 pkgs, err := sharedLibraryMatcher(resolver, classifier, libraryLocation) 159 if err != nil { 160 return nil, err 161 } 162 for _, p := range pkgs { 163 // set the source binary as the first location 164 locationSet := file.NewLocationSet(location) 165 locationSet.Add(p.Locations.ToSlice()...) 166 p.Locations = locationSet 167 meta, _ := p.Metadata.(pkg.BinarySignature) 168 p.Metadata = pkg.BinarySignature{ 169 Matches: append([]pkg.ClassifierMatch{ 170 { 171 Classifier: classifier.Class, 172 Location: location, 173 }, 174 }, meta.Matches...), 175 } 176 packages = append(packages, p) 177 } 178 } 179 } 180 return packages, nil 181 } 182 } 183 184 func mustPURL(purl string) packageurl.PackageURL { 185 p, err := packageurl.FromString(purl) 186 if err != nil { 187 panic(fmt.Sprintf("invalid PURL: %s", p)) 188 } 189 return p 190 } 191 192 func getContents(resolver file.Resolver, location file.Location) ([]byte, error) { 193 reader, err := resolver.FileContentsByLocation(location) 194 if err != nil { 195 return nil, err 196 } 197 198 unionReader, err := unionreader.GetUnionReader(reader) 199 if err != nil { 200 return nil, fmt.Errorf("unable to get union reader for file: %w", err) 201 } 202 203 // TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader. 204 contents, err := io.ReadAll(unionReader) 205 if err != nil { 206 return nil, fmt.Errorf("unable to get contents for file: %w", err) 207 } 208 209 return contents, nil 210 } 211 212 // singleCPE returns a []pkg.CPE based on the cpe string or panics if the CPE is invalid 213 func singleCPE(cpeString string) []cpe.CPE { 214 return []cpe.CPE{ 215 cpe.Must(cpeString), 216 } 217 } 218 219 // sharedLibraries returns a list of all shared libraries found within a binary, currently 220 // supporting: elf, macho, and windows pe 221 func sharedLibraries(resolver file.Resolver, location file.Location) ([]string, error) { 222 contents, err := getContents(resolver, location) 223 if err != nil { 224 return nil, err 225 } 226 227 r := bytes.NewReader(contents) 228 229 e, _ := elf.NewFile(r) 230 if e != nil { 231 symbols, err := e.ImportedLibraries() 232 if err != nil { 233 log.Debugf("unable to read elf binary at: %s -- %s", location.RealPath, err) 234 } 235 return symbols, nil 236 } 237 238 m, _ := macho.NewFile(r) 239 if m != nil { 240 symbols, err := m.ImportedLibraries() 241 if err != nil { 242 log.Debugf("unable to read macho binary at: %s -- %s", location.RealPath, err) 243 } 244 return symbols, nil 245 } 246 247 p, _ := pe.NewFile(r) 248 if p != nil { 249 symbols, err := p.ImportedLibraries() 250 if err != nil { 251 log.Debugf("unable to read pe binary at: %s -- %s", location.RealPath, err) 252 } 253 return symbols, nil 254 } 255 256 return nil, nil 257 }