github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/binary/classifier.go (about) 1 package binary 2 3 import ( 4 "bytes" 5 "debug/elf" 6 "debug/macho" 7 "debug/pe" 8 "fmt" 9 "io" 10 "regexp" 11 "strings" 12 "text/template" 13 14 "github.com/nextlinux/gosbom/gosbom/cpe" 15 "github.com/nextlinux/gosbom/gosbom/file" 16 "github.com/nextlinux/gosbom/gosbom/pkg" 17 "github.com/nextlinux/gosbom/gosbom/pkg/cataloger/internal/unionreader" 18 "github.com/nextlinux/gosbom/internal" 19 "github.com/nextlinux/gosbom/internal/log" 20 21 "github.com/anchore/packageurl-go" 22 ) 23 24 var emptyPURL = packageurl.PackageURL{} 25 26 // classifier is a generic package classifier that can be used to match a package definition 27 // to a file that meets the given content criteria of the evidenceMatcher. 28 type classifier struct { 29 Class string 30 31 // FileGlob is a selector to narrow down file inspection using the **/glob* syntax 32 FileGlob string 33 34 // EvidenceMatcher is what will be used to match against the file in the source 35 // location. If the matcher returns a package, the file will be considered a candidate. 36 EvidenceMatcher evidenceMatcher 37 38 // Information below is used to specify the Package information when returned 39 40 // Package is the name to use for the package 41 Package string 42 43 // Language is the language to classify this package as 44 Language pkg.Language 45 46 // Type is the package type to use for the package 47 Type pkg.Type 48 49 // PURL is the Package URL to use when generating a package 50 PURL packageurl.PackageURL 51 52 // CPEs are the specific CPEs we want to include for this binary with updated version information 53 CPEs []cpe.CPE 54 } 55 56 // evidenceMatcher is a function called to catalog Packages that match some sort of evidence 57 type evidenceMatcher func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) 58 59 func evidenceMatchers(matchers ...evidenceMatcher) evidenceMatcher { 60 return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) { 61 for _, matcher := range matchers { 62 match, err := matcher(resolver, classifier, location) 63 if err != nil { 64 return nil, err 65 } 66 if match != nil { 67 return match, nil 68 } 69 } 70 return nil, nil 71 } 72 } 73 74 func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) evidenceMatcher { 75 pat := regexp.MustCompile(fileNamePattern) 76 return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) { 77 if !pat.MatchString(location.RealPath) { 78 return nil, nil 79 } 80 81 filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, location.RealPath) 82 83 // versions like 3.5 should not match any character, but explicit dot 84 for k, v := range filepathNamedGroupValues { 85 filepathNamedGroupValues[k] = strings.ReplaceAll(v, ".", "\\.") 86 } 87 88 tmpl, err := template.New("").Parse(contentTemplate) 89 if err != nil { 90 return nil, fmt.Errorf("unable to parse classifier template=%q : %w", contentTemplate, err) 91 } 92 93 patternBuf := &bytes.Buffer{} 94 err = tmpl.Execute(patternBuf, filepathNamedGroupValues) 95 if err != nil { 96 return nil, fmt.Errorf("unable to render template: %w", err) 97 } 98 99 tmplPattern, err := regexp.Compile(patternBuf.String()) 100 if err != nil { 101 return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err) 102 } 103 104 contents, err := getContents(resolver, location) 105 if err != nil { 106 return nil, fmt.Errorf("unable to get read contents for file: %w", err) 107 } 108 109 matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents)) 110 111 p := newPackage(classifier, location, matchMetadata) 112 if p == nil { 113 return nil, nil 114 } 115 116 return []pkg.Package{*p}, nil 117 } 118 } 119 120 func fileContentsVersionMatcher(pattern string) evidenceMatcher { 121 pat := regexp.MustCompile(pattern) 122 return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) { 123 contents, err := getContents(resolver, location) 124 if err != nil { 125 return nil, fmt.Errorf("unable to get read contents for file: %w", err) 126 } 127 128 matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents)) 129 130 p := newPackage(classifier, location, matchMetadata) 131 if p == nil { 132 return nil, nil 133 } 134 135 return []pkg.Package{*p}, nil 136 } 137 } 138 139 //nolint:gocognit 140 func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher evidenceMatcher) evidenceMatcher { 141 pat := regexp.MustCompile(sharedLibraryPattern) 142 return func(resolver file.Resolver, classifier classifier, location file.Location) (packages []pkg.Package, _ error) { 143 libs, err := sharedLibraries(resolver, location) 144 if err != nil { 145 return nil, err 146 } 147 for _, lib := range libs { 148 if !pat.MatchString(lib) { 149 continue 150 } 151 152 locations, err := resolver.FilesByGlob("**/" + lib) 153 if err != nil { 154 return nil, err 155 } 156 for _, libraryLocation := range locations { 157 pkgs, err := sharedLibraryMatcher(resolver, classifier, libraryLocation) 158 if err != nil { 159 return nil, err 160 } 161 for _, p := range pkgs { 162 // set the source binary as the first location 163 locationSet := file.NewLocationSet(location) 164 locationSet.Add(p.Locations.ToSlice()...) 165 p.Locations = locationSet 166 meta, _ := p.Metadata.(pkg.BinaryMetadata) 167 p.Metadata = pkg.BinaryMetadata{ 168 Matches: append([]pkg.ClassifierMatch{ 169 { 170 Classifier: classifier.Class, 171 Location: location, 172 }, 173 }, meta.Matches...), 174 } 175 packages = append(packages, p) 176 } 177 } 178 } 179 return packages, nil 180 } 181 } 182 183 func mustPURL(purl string) packageurl.PackageURL { 184 p, err := packageurl.FromString(purl) 185 if err != nil { 186 panic(fmt.Sprintf("invalid PURL: %s", p)) 187 } 188 return p 189 } 190 191 func getContents(resolver file.Resolver, location file.Location) ([]byte, error) { 192 reader, err := resolver.FileContentsByLocation(location) 193 if err != nil { 194 return nil, err 195 } 196 197 unionReader, err := unionreader.GetUnionReader(reader) 198 if err != nil { 199 return nil, fmt.Errorf("unable to get union reader for file: %w", err) 200 } 201 202 // TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader. 203 contents, err := io.ReadAll(unionReader) 204 if err != nil { 205 return nil, fmt.Errorf("unable to get contents for file: %w", err) 206 } 207 208 return contents, nil 209 } 210 211 // singleCPE returns a []pkg.CPE based on the cpe string or panics if the CPE is invalid 212 func singleCPE(cpeString string) []cpe.CPE { 213 return []cpe.CPE{ 214 cpe.Must(cpeString), 215 } 216 } 217 218 // sharedLibraries returns a list of all shared libraries found within a binary, currently 219 // supporting: elf, macho, and windows pe 220 func sharedLibraries(resolver file.Resolver, location file.Location) ([]string, error) { 221 contents, err := getContents(resolver, location) 222 if err != nil { 223 return nil, err 224 } 225 226 r := bytes.NewReader(contents) 227 228 e, _ := elf.NewFile(r) 229 if e != nil { 230 symbols, err := e.ImportedLibraries() 231 if err != nil { 232 log.Debugf("unable to read elf binary at: %s -- %s", location.RealPath, err) 233 } 234 return symbols, nil 235 } 236 237 m, _ := macho.NewFile(r) 238 if m != nil { 239 symbols, err := m.ImportedLibraries() 240 if err != nil { 241 log.Debugf("unable to read macho binary at: %s -- %s", location.RealPath, err) 242 } 243 return symbols, nil 244 } 245 246 p, _ := pe.NewFile(r) 247 if p != nil { 248 symbols, err := p.ImportedLibraries() 249 if err != nil { 250 log.Debugf("unable to read pe binary at: %s -- %s", location.RealPath, err) 251 } 252 return symbols, nil 253 } 254 255 return nil, nil 256 }