github.com/anchore/syft@v1.38.2/syft/pkg/license.go (about) 1 package pkg 2 3 import ( 4 "context" 5 "crypto/sha256" 6 "encoding/hex" 7 "fmt" 8 "io" 9 "net/url" 10 "sort" 11 "strings" 12 13 "github.com/scylladb/go-set/strset" 14 15 "github.com/anchore/syft/internal/licenses" 16 "github.com/anchore/syft/internal/log" 17 "github.com/anchore/syft/internal/spdxlicense" 18 "github.com/anchore/syft/syft/artifact" 19 "github.com/anchore/syft/syft/file" 20 "github.com/anchore/syft/syft/license" 21 ) 22 23 var _ sort.Interface = (*Licenses)(nil) 24 25 // License represents an SPDX Expression or license value extracted from a package's metadata. 26 // A License is a unique combination of value, expression and type, where its sources are always 27 // considered merged and additions to the evidence of where it was found and how it was sourced. 28 // This is different from how we treat a package since we consider package paths in order to 29 // distinguish if packages should be kept separate. This is different for licenses since we're 30 // only looking for evidence of where a license was declared/concluded for a given package. 31 type License struct { 32 // SPDXExpression is parsed SPDX license expression (e.g. "MIT OR Apache-2.0") 33 SPDXExpression string 34 35 // Value is original raw license string as found in metadata (e.g. "mit or apache-2") 36 Value string 37 38 // Type is classification of how this license was discovered (declared, concluded, etc.). 39 // A Concluded License type is the license the SBOM creator believes governs the package (human crafted or altered SBOM). 40 // The Declared License is what the authors of a project believe govern the package (this is the default type syft uses). 41 Type license.Type 42 43 // Contents is full license text if available. If a license is given as its full text in the 44 // metadata rather than its value or SPDX expression, this field is used to represent that data. 45 Contents string `hash:"ignore"` 46 47 // URLs are the list of URLs where license information was found. These are ignored for uniqueness 48 // since we merge these fields across equal licenses. 49 URLs []string `hash:"ignore"` 50 51 // Locations are the file locations where this license was discovered. These are ignored for uniqueness 52 // since we merge these fields across equal licenses. 53 Locations file.LocationSet `hash:"ignore"` 54 } 55 56 // Licenses is a sortable collection of License objects implementing sort.Interface. 57 type Licenses []License 58 59 func (l Licenses) Len() int { 60 return len(l) 61 } 62 63 func (l Licenses) Less(i, j int) bool { 64 if l[i].Value == l[j].Value { 65 if l[i].SPDXExpression == l[j].SPDXExpression { 66 if l[i].Type == l[j].Type { 67 if l[i].Contents == l[j].Contents { 68 // While URLs and location are not exclusive fields 69 // returning true here reduces the number of swaps 70 // while keeping a consistent sort order of 71 // the order that they appear in the list initially 72 // If users in the future have a preference to sorting based 73 // on the slice representation of either field, we can update this code 74 return true 75 } 76 return l[i].Contents < l[j].Contents 77 } 78 return l[i].Type < l[j].Type 79 } 80 return l[i].SPDXExpression < l[j].SPDXExpression 81 } 82 return l[i].Value < l[j].Value 83 } 84 85 func (l Licenses) Swap(i, j int) { 86 l[i], l[j] = l[j], l[i] 87 } 88 89 func NewLicensesFromReadCloserWithContext(ctx context.Context, closer file.LocationReadCloser) []License { 90 //Definition: The license that the auditor or scanning tool concludes applies, based on the actual contents of the files. 91 //Source: Derived from analyzing the source code, license headers, and full license texts in the files. 92 // Given we are scanning the contents of the file, we should use the Concluded License type. 93 return newLicenseBuilder().WithContents(closer).WithLocations(closer.Location).WithType(license.Concluded).Build(ctx).ToSlice() 94 } 95 96 func NewLicenseWithContext(ctx context.Context, value string) License { 97 return NewLicenseFromTypeWithContext(ctx, value, license.Declared) 98 } 99 100 func NewLicenseFromTypeWithContext(ctx context.Context, value string, t license.Type) License { 101 lics := newLicenseBuilder().WithValues(value).WithType(t).Build(ctx).ToSlice() 102 if len(lics) > 0 { 103 return lics[0] 104 } 105 // TODO: this is not ideal, but also not expected given the input of "value" 106 return License{} 107 } 108 109 func NewLicensesFromValuesWithContext(ctx context.Context, values ...string) []License { 110 return newLicenseBuilder().WithValues(values...).Build(ctx).ToSlice() 111 } 112 113 func NewLicensesFromLocationWithContext(ctx context.Context, location file.Location, values ...string) []License { 114 return newLicenseBuilder().WithValues(values...).WithLocations(location).Build(ctx).ToSlice() 115 } 116 117 func NewLicenseFromLocationsWithContext(ctx context.Context, value string, locations ...file.Location) License { 118 lics := newLicenseBuilder().WithValues(value).WithLocations(locations...).Build(ctx).ToSlice() 119 if len(lics) > 0 { 120 return lics[0] 121 } 122 // TODO: this is not ideal, but also not expected given the input of "value" 123 return License{} 124 } 125 126 func NewLicenseFromURLsWithContext(ctx context.Context, value string, urls ...string) License { 127 lics := newLicenseBuilder().WithValues(value).WithURLs(urls...).Build(ctx).ToSlice() 128 if len(lics) > 0 { 129 return lics[0] 130 } 131 // TODO: this is not ideal, but also not expected given the input of "value" 132 return License{} 133 } 134 135 func stripUnwantedCharacters(rawURL string) (string, error) { 136 cleanedURL := strings.TrimSpace(rawURL) 137 _, err := url.ParseRequestURI(cleanedURL) 138 if err != nil { 139 return "", fmt.Errorf("invalid URL: %w", err) 140 } 141 142 return cleanedURL, nil 143 } 144 145 func NewLicenseFromFieldsWithContext(ctx context.Context, value, url string, location *file.Location) License { 146 // If value is empty but URL is provided, try to enrich from SPDX database 147 if value == "" && url != "" { 148 if info, found := spdxlicense.LicenseByURL(url); found { 149 value = info.ID 150 } 151 } 152 153 lics := newLicenseBuilder().WithValues(value).WithURLs(url).WithOptionalLocation(location).Build(ctx).ToSlice() 154 if len(lics) > 0 { 155 return lics[0] 156 } 157 // TODO: this is not ideal, but also not expected given the input of "value" 158 return License{} 159 } 160 161 func (s License) Empty() bool { 162 return s.Value == "" && s.SPDXExpression == "" && s.Contents == "" && len(s.URLs) == 0 163 } 164 165 // Merge two licenses into a new license object. If the merge is not possible due to unmergeable fields 166 // (e.g. different values for Value, SPDXExpression, Type, or any non-collection type) an error is returned. 167 // TODO: this is a bit of a hack to not infinitely recurse when hashing a license 168 func (s License) Merge(l License) (*License, error) { 169 sHash, err := artifact.IDByHash(s) 170 if err != nil { 171 return nil, err 172 } 173 lHash, err := artifact.IDByHash(l) 174 if err != nil { 175 return nil, err 176 } 177 if sHash != lHash { 178 return nil, fmt.Errorf("cannot merge licenses with different hash") 179 } 180 181 // try to keep s.URLs unallocated unless necessary (which is the default state from the constructor) 182 if len(l.URLs) > 0 { 183 s.URLs = append(s.URLs, l.URLs...) 184 } 185 186 if len(s.URLs) > 0 { 187 s.URLs = strset.New(s.URLs...).List() 188 sort.Strings(s.URLs) 189 } 190 191 if l.Locations.Empty() { 192 return &s, nil 193 } 194 195 // since the set instance has a reference type (map), we must make a new instance 196 locations := file.NewLocationSet(s.Locations.ToSlice()...) 197 locations.Add(l.Locations.ToSlice()...) 198 s.Locations = locations 199 200 return &s, nil 201 } 202 203 // licenseBuilder is an internal builder for constructing License objects with validation and normalization. 204 type licenseBuilder struct { 205 // values are raw license strings or SPDX expressions to process. 206 values []string 207 // contents are readers for full license text content. 208 contents []io.ReadCloser 209 // locations are file locations where license information was discovered. 210 locations []file.Location 211 // urls are web URLs where license information can be found. 212 urls []string 213 // tp is the license type classification (declared, concluded, etc.). 214 tp license.Type 215 } 216 217 func newLicenseBuilder() *licenseBuilder { 218 return &licenseBuilder{ 219 tp: license.Declared, 220 } 221 } 222 223 func (b *licenseBuilder) WithValues(expr ...string) *licenseBuilder { 224 for _, v := range expr { 225 if v == "" { 226 continue 227 } 228 b.values = append(b.values, v) 229 } 230 return b 231 } 232 233 func (b *licenseBuilder) WithOptionalLocation(location *file.Location) *licenseBuilder { 234 if location != nil { 235 b.locations = append(b.locations, *location) 236 } 237 return b 238 } 239 240 func (b *licenseBuilder) WithURLs(urls ...string) *licenseBuilder { 241 s := strset.New() 242 for _, u := range urls { 243 if u != "" { 244 sanitizedURL, err := stripUnwantedCharacters(u) 245 if err != nil { 246 log.Tracef("unable to sanitize url=%q: %s", u, err) 247 continue 248 } 249 s.Add(sanitizedURL) 250 } 251 } 252 253 b.urls = append(b.urls, s.List()...) 254 sort.Strings(b.urls) 255 return b 256 } 257 258 func (b *licenseBuilder) WithLocations(locations ...file.Location) *licenseBuilder { 259 for _, loc := range locations { 260 if loc.Path() != "" { 261 b.locations = append(b.locations, loc) 262 } 263 } 264 return b 265 } 266 267 func (b *licenseBuilder) WithContents(contents ...io.ReadCloser) *licenseBuilder { 268 for _, content := range contents { 269 if content != nil { 270 b.contents = append(b.contents, content) 271 } 272 } 273 return b 274 } 275 276 func (b *licenseBuilder) WithType(t license.Type) *licenseBuilder { 277 b.tp = t // last one wins, multiple is not valid 278 return b 279 } 280 281 func (b *licenseBuilder) Build(ctx context.Context) LicenseSet { 282 // for every value make a license with all locations 283 // or for every reader make a license with all locations 284 // if given a reader and a value, this is invalid 285 286 locations := file.NewLocationSet(b.locations...) 287 288 set := NewLicenseSet() 289 for _, v := range b.values { 290 if strings.Contains(v, "\n") { 291 var loc file.Location 292 if len(b.locations) > 0 { 293 loc = b.locations[0] 294 } 295 b.contents = append(b.contents, file.NewLocationReadCloser(loc, io.NopCloser(strings.NewReader(v)))) 296 continue 297 } 298 299 // we want to check if the SPDX field should be set 300 var expression string 301 if ex, err := license.ParseExpression(v); err == nil { 302 expression = ex 303 } 304 305 set.Add(License{ 306 SPDXExpression: expression, 307 Value: strings.TrimSpace(v), 308 Type: b.tp, 309 URLs: b.urls, 310 Locations: locations, 311 }) 312 } 313 314 // we have some readers (with no values); let's try to turn into licenses if we can 315 for _, content := range b.contents { 316 set.Add(b.buildFromContents(ctx, content)...) 317 } 318 319 if set.Empty() && len(b.urls) > 0 { 320 // if we have no values or contents, but we do have URLs, let's make a license with the URLs 321 // try to enrich the license by looking up the URL in the SPDX database 322 license := License{ 323 Type: b.tp, 324 URLs: b.urls, 325 Locations: locations, 326 } 327 328 // attempt to fill in missing license information from the first URL 329 if len(b.urls) > 0 { 330 if info, found := spdxlicense.LicenseByURL(b.urls[0]); found { 331 license.Value = info.ID 332 license.SPDXExpression = info.ID 333 } 334 } 335 336 set.Add(license) 337 } 338 339 return set 340 } 341 342 func (b *licenseBuilder) buildFromContents(ctx context.Context, contents io.ReadCloser) []License { 343 if !licenses.IsContextLicenseScannerSet(ctx) { 344 // we do not have a scanner; we don't want to create one; we sha256 the content and populate the value 345 internal, err := contentFromReader(contents) 346 if err != nil { 347 log.WithFields("error", err).Trace("could not read content") 348 return nil 349 } 350 return []License{b.licenseFromContentHash(internal)} 351 } 352 353 scanner, err := licenses.ContextLicenseScanner(ctx) 354 if err != nil { 355 log.WithFields("error", err).Trace("could not find license scanner") 356 internal, err := contentFromReader(contents) 357 if err != nil { 358 log.WithFields("error", err).Trace("could not read content") 359 return nil 360 } 361 return []License{b.licenseFromContentHash(internal)} 362 } 363 364 evidence, content, err := scanner.FindEvidence(ctx, contents) 365 if err != nil { 366 log.WithFields("error", err).Trace("scanner failed to scan contents") 367 return nil 368 } 369 370 if len(evidence) > 0 { 371 // we have some ID and offsets to apply to our content; let's make some detailed licenses 372 return b.licensesFromEvidenceAndContent(evidence, content) 373 } 374 // scanner couldn't find anything, but we still have the file contents; sha256 and send it back with value 375 return []License{b.licenseFromContentHash(string(content))} 376 } 377 378 func (b *licenseBuilder) licensesFromEvidenceAndContent(evidence []licenses.Evidence, content []byte) []License { 379 ls := make([]License, 0) 380 for _, e := range evidence { 381 // basic license 382 candidate := License{ 383 Value: e.ID, 384 Locations: file.NewLocationSet(b.locations...), 385 Type: b.tp, 386 } 387 // get content offset 388 if e.Start >= 0 && e.End <= len(content) && e.Start <= e.End { 389 candidate.Contents = string(content[e.Start:e.End]) 390 } 391 // check for SPDX Validity 392 if ex, err := license.ParseExpression(e.ID); err == nil { 393 candidate.SPDXExpression = ex 394 } 395 396 ls = append(ls, candidate) 397 } 398 return ls 399 } 400 401 func (b *licenseBuilder) licenseFromContentHash(content string) License { 402 hash := sha256HexFromString(content) 403 value := "sha256:" + hash 404 405 return License{ 406 Value: value, 407 Contents: content, 408 Type: b.tp, 409 Locations: file.NewLocationSet(b.locations...), 410 } 411 } 412 413 func contentFromReader(r io.Reader) (string, error) { 414 bytes, err := io.ReadAll(r) 415 if err != nil { 416 return "", err 417 } 418 return strings.TrimSpace(string(bytes)), nil 419 } 420 421 func sha256HexFromString(s string) string { 422 hash := sha256.Sum256([]byte(s)) 423 return hex.EncodeToString(hash[:]) 424 }