github.com/google/osv-scalibr@v0.4.1/converter/spdx/common_names.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package spdx 16 17 import ( 18 "regexp" 19 "sort" 20 "strings" 21 ) 22 23 // Handle mapping common names like LGPL2 to LGPL-2.0-only etc. 24 25 var ( 26 // conditionally remove hyphen before version number 27 minusVersion = regexp.MustCompile("[-]([0-9])") 28 29 // conditionally remove hyphen after version number 30 versionMinus = regexp.MustCompile("([0-9])[-]") 31 32 // remove ".0" from end of version number 33 trailingZero = regexp.MustCompile("[.]0($|[^.0-9])") 34 35 // turn "-Variant-Name" into initialism "VN" 36 trailingInitialism = regexp.MustCompile("[-]([A-Z])[a-z]+($|[^A-Za-z])") 37 38 commonLicenseNameToShortIdentifier map[string]string 39 ) 40 41 // mapCommonLicenseNames calculates a map from ill-formed common license names to canonical names. 42 func mapCommonLicenseNames() map[string]string { 43 var commonLicenseNameToShortIdentifier = make(map[string]string) 44 sortedCanonical := make([]string, 0, len(canonicalLicenses)) 45 // sort the canonical licenses so the `"name-only"` version overwrites the `"name"` version. 46 for canonical := range canonicalLicenses { 47 sortedCanonical = append(sortedCanonical, canonical) 48 } 49 sort.Strings(sortedCanonical) 50 51 // alreadyPopulated prevents an initialism from clobbering a name 52 alreadyPopulated := func(canonical, l string) bool { 53 other, ok := commonLicenseNameToShortIdentifier[strings.ToUpper(l)] 54 if !ok { 55 return false 56 } 57 // do overwrite "name" with "name-only" 58 return canonical != other+"-only" 59 } 60 61 for _, canonical := range sortedCanonical { 62 // support case-insensitive match 63 commonLicenseNameToShortIdentifier[strings.ToUpper(canonical)] = canonical 64 65 base := normalize(strings.ReplaceAll(strings.ReplaceAll(canonical, "-only", ""), "-or-later", "+")) 66 // base itself is a match for canonical 67 commonLicenseNameToShortIdentifier[strings.ToUpper(base)] = canonical 68 69 for { 70 // If the canonical has ver.0.0.0, accept each version with 1 fewer .0's 71 l := strings.ToUpper(base) 72 for loc := trailingZero.FindAllStringSubmatchIndex(l, -1); loc != nil; loc = trailingZero.FindAllStringSubmatchIndex(l, -1) { 73 l = replaceLastGroup(l, loc) 74 commonLicenseNameToShortIdentifier[l] = canonical 75 } 76 77 // handle potential initialism like "ASWF-Digital-Assets" as "ASWFDA" 78 l = makeInitialism(base) 79 if l != base { 80 if !alreadyPopulated(canonical, l) { 81 commonLicenseNameToShortIdentifier[strings.ToUpper(l)] = canonical 82 } 83 for loc := trailingZero.FindAllStringSubmatchIndex(l, -1); loc != nil; loc = trailingZero.FindAllStringSubmatchIndex(l, -1) { 84 l = replaceLastGroup(l, loc) 85 // don't overwrite an actual name with an initialism 86 if alreadyPopulated(canonical, l) { 87 continue 88 } 89 commonLicenseNameToShortIdentifier[strings.ToUpper(l)] = canonical 90 } 91 } 92 93 // repeat the above for 1.0- without the dash 94 l = versionMinus.ReplaceAllString(base, "$1") 95 if l == base { 96 l = strings.ReplaceAll(base, "-", "") 97 if l == base { 98 break 99 } 100 } 101 commonLicenseNameToShortIdentifier[strings.ToUpper(l)] = canonical 102 base = l 103 } 104 } 105 return commonLicenseNameToShortIdentifier 106 } 107 108 func replaceLastGroup(l string, locs [][]int) string { 109 loc := locs[len(locs)-1] 110 return l[:loc[0]] + l[loc[len(loc)-2]:loc[len(loc)-1]] + l[loc[1]:] 111 } 112 113 func normalize(l string) string { 114 // turn something like "Apache-2.0" into "Apache2.0" 115 return minusVersion.ReplaceAllString(strings.TrimSpace(l), "$1") 116 } 117 118 func makeInitialism(l string) string { 119 // turn something like "ASWF-Digital-Assets" into "ASWFDA" 120 for locs := trailingInitialism.FindAllStringSubmatchIndex(l, -1); locs != nil; locs = trailingInitialism.FindAllStringSubmatchIndex(l, -1) { 121 loc := locs[len(locs)-1] 122 l = l[:loc[0]] + l[loc[len(loc)-4]:loc[len(loc)-3]] + l[loc[len(loc)-2]:loc[len(loc)-1]] + l[loc[1]:] 123 } 124 return l 125 } 126 127 // ShortIdentifier returns the SPDX Short Identifier for the license name and true or an empty string and false. 128 // see: https://github.com/spdx/license-list-XML/blob/main/DOCS/license-fields.md#b-short-identifier 129 func ShortIdentifier(l string) (string, bool) { 130 if _, ok := canonicalLicenses[l]; ok { 131 return l, ok 132 } 133 l = strings.ToUpper(l) 134 if commonLicenseNameToShortIdentifier == nil { 135 commonLicenseNameToShortIdentifier = mapCommonLicenseNames() 136 } 137 if si, ok := commonLicenseNameToShortIdentifier[l]; ok { 138 return si, ok 139 } 140 if si, ok := commonLicenseNameToShortIdentifier[normalize(l)]; ok { 141 return si, ok 142 } 143 return "", false 144 }