github.com/google/osv-scalibr@v0.4.1/converter/spdx/common_names.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package spdx
    16  
    17  import (
    18  	"regexp"
    19  	"sort"
    20  	"strings"
    21  )
    22  
    23  // Handle mapping common names like LGPL2 to LGPL-2.0-only etc.
    24  
    25  var (
    26  	// conditionally remove hyphen before version number
    27  	minusVersion = regexp.MustCompile("[-]([0-9])")
    28  
    29  	// conditionally remove hyphen after version number
    30  	versionMinus = regexp.MustCompile("([0-9])[-]")
    31  
    32  	// remove ".0" from end of version number
    33  	trailingZero = regexp.MustCompile("[.]0($|[^.0-9])")
    34  
    35  	// turn "-Variant-Name" into initialism "VN"
    36  	trailingInitialism = regexp.MustCompile("[-]([A-Z])[a-z]+($|[^A-Za-z])")
    37  
    38  	commonLicenseNameToShortIdentifier map[string]string
    39  )
    40  
    41  // mapCommonLicenseNames calculates a map from ill-formed common license names to canonical names.
    42  func mapCommonLicenseNames() map[string]string {
    43  	var commonLicenseNameToShortIdentifier = make(map[string]string)
    44  	sortedCanonical := make([]string, 0, len(canonicalLicenses))
    45  	// sort the canonical licenses so the `"name-only"` version overwrites the `"name"` version.
    46  	for canonical := range canonicalLicenses {
    47  		sortedCanonical = append(sortedCanonical, canonical)
    48  	}
    49  	sort.Strings(sortedCanonical)
    50  
    51  	// alreadyPopulated prevents an initialism from clobbering a name
    52  	alreadyPopulated := func(canonical, l string) bool {
    53  		other, ok := commonLicenseNameToShortIdentifier[strings.ToUpper(l)]
    54  		if !ok {
    55  			return false
    56  		}
    57  		// do overwrite "name" with "name-only"
    58  		return canonical != other+"-only"
    59  	}
    60  
    61  	for _, canonical := range sortedCanonical {
    62  		// support case-insensitive match
    63  		commonLicenseNameToShortIdentifier[strings.ToUpper(canonical)] = canonical
    64  
    65  		base := normalize(strings.ReplaceAll(strings.ReplaceAll(canonical, "-only", ""), "-or-later", "+"))
    66  		// base itself is a match for canonical
    67  		commonLicenseNameToShortIdentifier[strings.ToUpper(base)] = canonical
    68  
    69  		for {
    70  			// If the canonical has ver.0.0.0, accept each version with 1 fewer .0's
    71  			l := strings.ToUpper(base)
    72  			for loc := trailingZero.FindAllStringSubmatchIndex(l, -1); loc != nil; loc = trailingZero.FindAllStringSubmatchIndex(l, -1) {
    73  				l = replaceLastGroup(l, loc)
    74  				commonLicenseNameToShortIdentifier[l] = canonical
    75  			}
    76  
    77  			// handle potential initialism like "ASWF-Digital-Assets" as "ASWFDA"
    78  			l = makeInitialism(base)
    79  			if l != base {
    80  				if !alreadyPopulated(canonical, l) {
    81  					commonLicenseNameToShortIdentifier[strings.ToUpper(l)] = canonical
    82  				}
    83  				for loc := trailingZero.FindAllStringSubmatchIndex(l, -1); loc != nil; loc = trailingZero.FindAllStringSubmatchIndex(l, -1) {
    84  					l = replaceLastGroup(l, loc)
    85  					// don't overwrite an actual name with an initialism
    86  					if alreadyPopulated(canonical, l) {
    87  						continue
    88  					}
    89  					commonLicenseNameToShortIdentifier[strings.ToUpper(l)] = canonical
    90  				}
    91  			}
    92  
    93  			// repeat the above for 1.0- without the dash
    94  			l = versionMinus.ReplaceAllString(base, "$1")
    95  			if l == base {
    96  				l = strings.ReplaceAll(base, "-", "")
    97  				if l == base {
    98  					break
    99  				}
   100  			}
   101  			commonLicenseNameToShortIdentifier[strings.ToUpper(l)] = canonical
   102  			base = l
   103  		}
   104  	}
   105  	return commonLicenseNameToShortIdentifier
   106  }
   107  
   108  func replaceLastGroup(l string, locs [][]int) string {
   109  	loc := locs[len(locs)-1]
   110  	return l[:loc[0]] + l[loc[len(loc)-2]:loc[len(loc)-1]] + l[loc[1]:]
   111  }
   112  
   113  func normalize(l string) string {
   114  	// turn something like "Apache-2.0" into "Apache2.0"
   115  	return minusVersion.ReplaceAllString(strings.TrimSpace(l), "$1")
   116  }
   117  
   118  func makeInitialism(l string) string {
   119  	// turn something like "ASWF-Digital-Assets" into "ASWFDA"
   120  	for locs := trailingInitialism.FindAllStringSubmatchIndex(l, -1); locs != nil; locs = trailingInitialism.FindAllStringSubmatchIndex(l, -1) {
   121  		loc := locs[len(locs)-1]
   122  		l = l[:loc[0]] + l[loc[len(loc)-4]:loc[len(loc)-3]] + l[loc[len(loc)-2]:loc[len(loc)-1]] + l[loc[1]:]
   123  	}
   124  	return l
   125  }
   126  
   127  // ShortIdentifier returns the SPDX Short Identifier for the license name and true or an empty string and false.
   128  // see: https://github.com/spdx/license-list-XML/blob/main/DOCS/license-fields.md#b-short-identifier
   129  func ShortIdentifier(l string) (string, bool) {
   130  	if _, ok := canonicalLicenses[l]; ok {
   131  		return l, ok
   132  	}
   133  	l = strings.ToUpper(l)
   134  	if commonLicenseNameToShortIdentifier == nil {
   135  		commonLicenseNameToShortIdentifier = mapCommonLicenseNames()
   136  	}
   137  	if si, ok := commonLicenseNameToShortIdentifier[l]; ok {
   138  		return si, ok
   139  	}
   140  	if si, ok := commonLicenseNameToShortIdentifier[normalize(l)]; ok {
   141  		return si, ok
   142  	}
   143  	return "", false
   144  }