github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/gentoo/license.go (about)

     1  package gentoo
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"slices"
     9  	"strings"
    10  
    11  	"github.com/scylladb/go-set/strset"
    12  
    13  	"github.com/anchore/syft/internal"
    14  	"github.com/anchore/syft/internal/log"
    15  	"github.com/anchore/syft/syft/file"
    16  )
    17  
    18  // the licenses files seems to conform to a custom format that is common to gentoo packages.
    19  // see more details:
    20  //  - https://www.gentoo.org/glep/glep-0023.html#id9
    21  //  - https://devmanual.gentoo.org/general-concepts/licenses/index.html
    22  //
    23  // in short, the format is:
    24  //
    25  //   mandatory-license
    26  //      || ( choosable-licence1 chooseable-license-2 )
    27  //      useflag? ( optional-component-license )
    28  //
    29  //   "License names may contain [a-zA-Z0-9] (english alphanumeric characters), _ (underscore), - (hyphen), .
    30  //   (dot) and + (plus sign). They must not begin with a hyphen, a dot or a plus sign."
    31  //
    32  // this does not conform to SPDX license expressions, which would be a great enhancement in the future.
    33  
    34  // extractLicenses attempts to parse the license field into a valid SPDX license expression
    35  func extractLicenses(resolver file.Resolver, closestLocation *file.Location, reader io.Reader) (string, string) {
    36  	findings := strset.New()
    37  	contentsWriter := bytes.Buffer{}
    38  	scanner := bufio.NewScanner(io.TeeReader(reader, &contentsWriter))
    39  	scanner.Split(bufio.ScanWords)
    40  	var (
    41  		mandatoryLicenses, conditionalLicenses, useflagLicenses []string
    42  		usesGroups                                              bool
    43  		pipe                                                    bool
    44  		useflag                                                 bool
    45  	)
    46  
    47  	for scanner.Scan() {
    48  		token := scanner.Text()
    49  		if token == "||" {
    50  			pipe = true
    51  			continue
    52  		}
    53  		// useflag
    54  		if strings.Contains(token, "?") {
    55  			useflag = true
    56  			continue
    57  		}
    58  		if !strings.ContainsAny(token, "()|?") {
    59  			switch {
    60  			case useflag:
    61  				useflagLicenses = append(useflagLicenses, token)
    62  			case pipe:
    63  				conditionalLicenses = append(conditionalLicenses, token)
    64  			default:
    65  				mandatoryLicenses = append(mandatoryLicenses, token)
    66  			}
    67  			if strings.HasPrefix(token, "@") {
    68  				usesGroups = true
    69  			}
    70  		}
    71  	}
    72  
    73  	var licenseGroups map[string][]string
    74  	if usesGroups {
    75  		licenseGroups = readLicenseGroups(resolver, closestLocation)
    76  	}
    77  	mandatoryLicenses = replaceLicenseGroups(mandatoryLicenses, licenseGroups)
    78  	conditionalLicenses = replaceLicenseGroups(conditionalLicenses, licenseGroups)
    79  	findings.Add(mandatoryLicenses...)
    80  	findings.Add(conditionalLicenses...)
    81  	findings.Add(useflagLicenses...)
    82  
    83  	var mandatoryStatement, conditionalStatement string
    84  
    85  	// attempt to build valid SPDX license expression
    86  	if len(mandatoryLicenses) > 0 {
    87  		mandatoryStatement = strings.Join(mandatoryLicenses, " AND ")
    88  	}
    89  	if len(conditionalLicenses) > 0 {
    90  		conditionalStatement = strings.Join(conditionalLicenses, " OR ")
    91  	}
    92  
    93  	contents := strings.TrimSpace(contentsWriter.String())
    94  
    95  	if mandatoryStatement != "" && conditionalStatement != "" {
    96  		return contents, mandatoryStatement + " AND (" + conditionalStatement + ")"
    97  	}
    98  
    99  	if mandatoryStatement != "" {
   100  		return contents, mandatoryStatement
   101  	}
   102  
   103  	if conditionalStatement != "" {
   104  		return contents, conditionalStatement
   105  	}
   106  
   107  	return contents, ""
   108  }
   109  
   110  func readLicenseGroups(resolver file.Resolver, closestLocation *file.Location) map[string][]string {
   111  	if resolver == nil || closestLocation == nil {
   112  		return nil
   113  	}
   114  	var licenseGroups map[string][]string
   115  	groupLocation := resolver.RelativeFileByPath(*closestLocation, "/etc/portage/license_groups")
   116  	if groupLocation == nil {
   117  		return nil
   118  	}
   119  
   120  	groupReader, err := resolver.FileContentsByLocation(*groupLocation)
   121  	defer internal.CloseAndLogError(groupReader, groupLocation.RealPath)
   122  	if err != nil {
   123  		log.WithFields("path", groupLocation.RealPath, "error", err).Debug("failed to fetch portage LICENSE")
   124  		return nil
   125  	}
   126  
   127  	if groupReader == nil {
   128  		return nil
   129  	}
   130  
   131  	licenseGroups, err = parseLicenseGroups(groupReader)
   132  	if err != nil {
   133  		log.WithFields("path", groupLocation.RealPath, "error", err).Debug("failed to parse portage LICENSE")
   134  	}
   135  
   136  	return licenseGroups
   137  }
   138  
   139  func replaceLicenseGroups(licenses []string, groups map[string][]string) []string {
   140  	if groups == nil {
   141  		return licenses
   142  	}
   143  
   144  	result := make([]string, 0, len(licenses))
   145  	for _, license := range licenses {
   146  		if strings.HasPrefix(license, "@") {
   147  			// this is a license group...
   148  			name := strings.TrimPrefix(license, "@")
   149  			if expandedLicenses, ok := groups[name]; ok {
   150  				result = append(result, expandedLicenses...)
   151  			} else {
   152  				// unable to expand, use the original license group value (including the '@')
   153  				result = append(result, license)
   154  			}
   155  		} else {
   156  			// this is a license...
   157  			result = append(result, license)
   158  		}
   159  	}
   160  	return result
   161  }
   162  
   163  func parseLicenseGroups(reader io.Reader) (map[string][]string, error) {
   164  	result := make(map[string][]string)
   165  	rawGroups := make(map[string][]string)
   166  
   167  	scanner := bufio.NewScanner(reader)
   168  
   169  	// first collect all raw groups
   170  	for scanner.Scan() {
   171  		line := strings.TrimSpace(scanner.Text())
   172  
   173  		if line == "" || strings.HasPrefix(line, "#") {
   174  			// skip empty lines and comments
   175  			continue
   176  		}
   177  
   178  		parts := strings.Fields(line)
   179  		if len(parts) < 2 {
   180  			return nil, fmt.Errorf("invalid line format: %s", line)
   181  		}
   182  
   183  		groupName := parts[0]
   184  		licenses := parts[1:]
   185  
   186  		rawGroups[groupName] = licenses
   187  	}
   188  
   189  	if err := scanner.Err(); err != nil {
   190  		return nil, err
   191  	}
   192  
   193  	// next process each group to expand nested references
   194  	for groupName, licenses := range rawGroups {
   195  		expanded, err := expandLicenses(groupName, licenses, rawGroups, make(map[string]bool))
   196  		if err != nil {
   197  			return nil, err
   198  		}
   199  		result[groupName] = expanded
   200  	}
   201  
   202  	return result, nil
   203  }
   204  
   205  // expandLicenses handles the recursive expansion of license groups, 'visited' is used to detect cycles. We are always
   206  // in terms of slices instead of sets to ensure original ordering is preserved.
   207  func expandLicenses(currentGroup string, licenses []string, rawGroups map[string][]string, visited map[string]bool) ([]string, error) {
   208  	if visited[currentGroup] {
   209  		return nil, fmt.Errorf("cycle detected in license group definitions for group: %s", currentGroup)
   210  	}
   211  
   212  	visited[currentGroup] = true
   213  
   214  	result := make([]string, 0)
   215  
   216  	for _, item := range licenses {
   217  		if strings.HasPrefix(item, "@") {
   218  			// this is a reference to another group
   219  			refGroupName := item[1:] // remove '@' prefix
   220  
   221  			refLicenses, exists := rawGroups[refGroupName]
   222  			if !exists {
   223  				return nil, fmt.Errorf("referenced group not found: %s", refGroupName)
   224  			}
   225  
   226  			newVisited := make(map[string]bool)
   227  			for k, v := range visited {
   228  				newVisited[k] = v
   229  			}
   230  
   231  			expanded, err := expandLicenses(refGroupName, refLicenses, rawGroups, newVisited)
   232  			if err != nil {
   233  				return nil, err
   234  			}
   235  
   236  			for _, license := range expanded {
   237  				if !slices.Contains(result, license) {
   238  					result = append(result, license)
   239  				}
   240  			}
   241  		} else if !slices.Contains(result, item) {
   242  			// ...this is a regular license
   243  			result = append(result, item)
   244  		}
   245  	}
   246  
   247  	return result, nil
   248  }