github.com/google/osv-scalibr@v0.4.1/converter/spdx/license.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package spdx
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  
    21  	"bitbucket.org/creachadair/stringset"
    22  	"github.com/spdx/tools-golang/spdx/v2/v2_3"
    23  	"github.com/thoas/go-funk"
    24  )
    25  
    26  const (
    27  	// See https://docs.deps.dev/faq/#how-are-licenses-determined for more license info.
    28  
    29  	// NonStandardLicense refers to a non-spdx-compliant license.
    30  	NonStandardLicense = "non-standard"
    31  
    32  	// UnknownLicense refers to a license we can't identify.
    33  	UnknownLicense = "unknown"
    34  
    35  	// LicenseRefPrefix is the prefix for non-standard licenses.
    36  	LicenseRefPrefix = "LicenseRef-"
    37  )
    38  
    39  // LicenseExpression takes an array of licenses and transforms it into an SPDX-compliant license
    40  // expression. These licenses can have come from anywhere, so we don't assume anything about their
    41  // values.
    42  // We parse licenses that are singular expressions (e.g. "MIT") and those that are basic expressions
    43  // (e.g. "MIT AND LGPL").
    44  func LicenseExpression(licenses []string) (string, stringset.Set) {
    45  	cleanLicenses := cleanLicenseExpression(licenses)
    46  	if len(cleanLicenses) == 0 {
    47  		return NoAssertion, stringset.Set{}
    48  	}
    49  	licenseExpressionSet := stringset.New()
    50  	customLicenses := stringset.New()
    51  	for _, l := range cleanLicenses {
    52  		// If there is a nonstandard placeholder value, then we just mark the whole block as
    53  		// NOASSERTION, as we can't construct a license expression with it.
    54  		if strings.EqualFold(l, UnknownLicense) || strings.EqualFold(l, NonStandardLicense) {
    55  			return NoAssertion, stringset.Set{}
    56  		}
    57  		// If we have an OR, then we need to both validate every license inside the expression, and
    58  		// wrap in parentheses, so that it's clear that it's distinct from any ANDs.
    59  		l := strings.ReplaceAll(l, " or ", " OR ")
    60  		if strings.Contains(l, " OR ") {
    61  			orLicenses := []string{}
    62  			orLicenseSplit := strings.SplitSeq(l, " OR ")
    63  			for ols := range orLicenseSplit {
    64  				spdxL, customL := spdxAndCustomLicenses(ols)
    65  				orLicenses = append(orLicenses, spdxL)
    66  				if customL != "" {
    67  					customLicenses.Add(customL)
    68  				}
    69  			}
    70  			// Combine them back
    71  			licenseExpressionSet.Add(fmt.Sprintf("(%s)", strings.Join(orLicenses, " OR ")))
    72  		} else {
    73  			spdxL, customL := spdxAndCustomLicenses(l)
    74  			licenseExpressionSet.Add(spdxL)
    75  			if customL != "" {
    76  				customLicenses.Add(customL)
    77  			}
    78  		}
    79  	}
    80  	return strings.Join(licenseExpressionSet.Elements(), " AND "), customLicenses
    81  }
    82  
    83  // cleanLicenseExpression preparses the licenses to allow extraction, by
    84  // 1. Removing empty licenses
    85  // 2. Stripping off leading/trailing parentheses
    86  // 3. Treating AND licenses as separate licenses
    87  func cleanLicenseExpression(licenses []string) []string {
    88  	cleanLicenses := []string{}
    89  	for _, l := range licenses {
    90  		if l == "" {
    91  			continue
    92  		}
    93  		var noParenLicense string
    94  		if strings.HasPrefix(l, "(") && strings.HasSuffix(l, ")") {
    95  			noParenLicense = l[1 : len(l)-1]
    96  		} else {
    97  			noParenLicense = l
    98  		}
    99  		l = strings.ReplaceAll(noParenLicense, " and ", " AND ")
   100  		cleanLicenses = append(cleanLicenses, strings.Split(l, " AND ")...)
   101  	}
   102  	return cleanLicenses
   103  }
   104  
   105  // spdxAndCustomLicenses takes a single license, and returns just it (if it is a valid spdx license)
   106  // or the cleaned version of it for the reference, and the actual text
   107  func spdxAndCustomLicenses(l string) (string, string) {
   108  	if shortID, ok := ShortIdentifier(l); ok {
   109  		return shortID, ""
   110  	}
   111  	return spdxLicenceRef(l), l
   112  }
   113  
   114  // ToOtherLicenses converts a stringset to an SPDX OtherLicense field.
   115  func ToOtherLicenses(otherLicenses stringset.Set) []*v2_3.OtherLicense {
   116  	if otherLicenses.Empty() {
   117  		return nil
   118  	}
   119  	return funk.Map(otherLicenses.Elements(), func(l string) *v2_3.OtherLicense {
   120  		return &v2_3.OtherLicense{LicenseIdentifier: spdxLicenceRef(l), ExtractedText: l}
   121  	}).([]*v2_3.OtherLicense)
   122  }
   123  
   124  func spdxLicenceRef(l string) string {
   125  	return LicenseRefPrefix + replaceSPDXIDInvalidChars(l)
   126  }