github.com/anchore/syft@v1.38.2/internal/spdxlicense/generate/generate_license_list.go (about)

     1  package main
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"log"
     7  	"net/http"
     8  	"os"
     9  	"regexp"
    10  	"sort"
    11  	"strings"
    12  	"text/template"
    13  )
    14  
    15  // This program generates license_list.go.
    16  const (
    17  	source = "license_list.go"
    18  	url    = "https://spdx.org/licenses/licenses.json"
    19  )
    20  
    21  var tmp = template.Must(template.New("").Parse(`// Code generated by go generate; DO NOT EDIT.
    22  // This file was generated using data from {{ .URL }}
    23  package spdxlicense
    24  
    25  const Version = {{ printf "%q" .Version }}
    26  
    27  var licenseIDs = map[string]string{
    28  {{- range $k, $v := .LicenseIDs }}
    29  	{{ printf "%q" $k }}: {{ printf "%q" $v }},
    30  {{- end }}
    31  }
    32  
    33  // urlToLicense maps license URLs from the seeAlso field to license IDs
    34  var urlToLicense = map[string]string{
    35  {{- range $url, $id := .URLToLicense }}
    36  	{{ printf "%q" $url }}: {{ printf "%q" $id }},
    37  {{- end }}
    38  }
    39  `))
    40  
    41  var versionMatch = regexp.MustCompile(`([0-9]+)\.?([0-9]+)?\.?([0-9]+)?\.?`)
    42  
    43  func main() {
    44  	if err := run(); err != nil {
    45  		fmt.Println(err.Error())
    46  		os.Exit(1)
    47  	}
    48  }
    49  
    50  func run() error {
    51  	resp, err := http.Get(url)
    52  	if err != nil {
    53  		return fmt.Errorf("unable to get licenses list: %w", err)
    54  	}
    55  	var result LicenseList
    56  	if err = json.NewDecoder(resp.Body).Decode(&result); err != nil {
    57  		return fmt.Errorf("unable to decode license list: %w", err)
    58  	}
    59  	defer func() {
    60  		if err := resp.Body.Close(); err != nil {
    61  			log.Fatalf("unable to close body: %+v", err)
    62  		}
    63  	}()
    64  
    65  	f, err := os.Create(source)
    66  	if err != nil {
    67  		return fmt.Errorf("unable to create %q: %w", source, err)
    68  	}
    69  	defer func() {
    70  		if err := f.Close(); err != nil {
    71  			log.Fatalf("unable to close %q: %+v", source, err)
    72  		}
    73  	}()
    74  
    75  	licenseIDs := processSPDXLicense(result)
    76  	urlToLicense := buildURLToLicenseMap(result)
    77  
    78  	err = tmp.Execute(f, struct {
    79  		URL          string
    80  		Version      string
    81  		LicenseIDs   map[string]string
    82  		URLToLicense map[string]string
    83  	}{
    84  		URL:          url,
    85  		Version:      result.Version,
    86  		LicenseIDs:   licenseIDs,
    87  		URLToLicense: urlToLicense,
    88  	})
    89  
    90  	if err != nil {
    91  		return fmt.Errorf("unable to generate template: %w", err)
    92  	}
    93  	return nil
    94  }
    95  
    96  // Parsing the provided SPDX license list necessitates a three pass approach.
    97  // The first pass is only related to what SPDX considers the truth. We use license info to
    98  // find replacements for deprecated licenses.
    99  // The second pass attempts to generate known short/long version listings for each key.
   100  // For info on some short name conventions see this document:
   101  // https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#license-short-name.
   102  // The short long listing generation attempts to build all license permutations for a given key.
   103  // The new keys are then also associated with their relative SPDX value. If a key has already been entered
   104  // we know to ignore it since it came from the first pass which is considered the SPDX source of truth.
   105  // We also sort the licenses for the second pass so that cases like `GPL-1` associate to `GPL-1.0` and not `GPL-1.1`.
   106  // The third pass is for overwriting deprecated licenses with replacements, for example GPL-2.0+ is deprecated
   107  // and now maps to GPL-2.0-or-later.
   108  func processSPDXLicense(result LicenseList) map[string]string {
   109  	// The order of variations/permutations of a license ID matter.
   110  	// The permutation code can generate the same value for two difference licenses,
   111  	// for example: The licenses `ABC-1.0` and `ABC-1.1` can both map to `ABC1`,
   112  	// we need to guarantee the order they are created to avoid mapping them incorrectly.
   113  	// To do this we use a sorted list.
   114  	sort.Slice(result.Licenses, func(i, j int) bool {
   115  		return result.Licenses[i].ID < result.Licenses[j].ID
   116  	})
   117  
   118  	// keys are simplified by removing dashes and lowercasing ID
   119  	// this is so license declarations in the wild like: LGPL3 LGPL-3 lgpl3 and lgpl-3 can all match
   120  	licenseIDs := make(map[string]string)
   121  	for _, l := range result.Licenses {
   122  		// licensePerms includes the cleanID in return slice
   123  		cleanID := cleanLicenseID(l.ID)
   124  		licensePerms := buildLicenseIDPermutations(cleanID)
   125  
   126  		// if license is deprecated, find its replacement and add to licenseIDs
   127  		if l.Deprecated {
   128  			idToMap := l.ID
   129  			replacement := result.findReplacementLicense(l)
   130  			if replacement != nil {
   131  				idToMap = replacement.ID
   132  			}
   133  			// it's important to use the original licensePerms here so that the deprecated license
   134  			// can now point to the new correct license
   135  			for _, id := range licensePerms {
   136  				if _, exists := licenseIDs[id]; exists {
   137  					// can be used to debug duplicate license permutations and confirm that examples like GPL1
   138  					// do not point to GPL-1.1
   139  					// log.Println("duplicate license list permutation found when mapping deprecated license to replacement")
   140  					// log.Printf("already have key: %q for SPDX ID: %q; attempted to map replacement ID: %q for deprecated ID: %q\n", id, value, replacement.ID, l.ID)
   141  					continue
   142  				}
   143  				licenseIDs[id] = idToMap
   144  			}
   145  		}
   146  
   147  		// if license is not deprecated, add all permutations to licenseIDs
   148  		for _, id := range licensePerms {
   149  			if _, exists := licenseIDs[id]; exists {
   150  				// log.Println("found duplicate license permutation key for non deprecated license")
   151  				// log.Printf("already have key: %q for SPDX ID: %q; tried to insert as SPDX ID:%q\n", id, value, l.ID)
   152  				continue
   153  			}
   154  			licenseIDs[id] = l.ID
   155  		}
   156  	}
   157  
   158  	return licenseIDs
   159  }
   160  
   161  func cleanLicenseID(id string) string {
   162  	cleanID := strings.ToLower(id)
   163  	return strings.ReplaceAll(cleanID, "-", "")
   164  }
   165  
   166  // buildURLToLicenseMap creates a mapping from license URLs (from seeAlso fields) to license IDs
   167  func buildURLToLicenseMap(result LicenseList) map[string]string {
   168  	urlMap := make(map[string]string)
   169  
   170  	for _, l := range result.Licenses {
   171  		// Skip deprecated licenses
   172  		if l.Deprecated {
   173  			// Find replacement license if available
   174  			replacement := result.findReplacementLicense(l)
   175  			if replacement != nil {
   176  				// Map deprecated license URLs to the replacement license
   177  				for _, url := range l.SeeAlso {
   178  					urlMap[url] = replacement.ID
   179  				}
   180  			}
   181  			continue
   182  		}
   183  
   184  		// Add URLs from non-deprecated licenses
   185  		for _, url := range l.SeeAlso {
   186  			urlMap[url] = l.ID
   187  		}
   188  	}
   189  
   190  	return urlMap
   191  }