github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/cpe/cpe.go (about) 1 package cpe 2 3 import ( 4 "fmt" 5 "regexp" 6 "strings" 7 8 "github.com/facebookincubator/nvdtools/wfn" 9 ) 10 11 type CPE = wfn.Attributes 12 13 const ( 14 allowedCPEPunctuation = "-!\"#$%&'()+,./:;<=>@[]^`{|}~" 15 ) 16 17 // This regex string is taken from 18 // https://csrc.nist.gov/schema/cpe/2.3/cpe-naming_2.3.xsd which has the official cpe spec 19 // This first part matches CPE urls and the second part matches binding strings 20 const cpeRegexString = ((`^([c][pP][eE]:/[AHOaho]?(:[A-Za-z0-9\._\-~%]*){0,6})`) + 21 // Or match the CPE binding string 22 // Note that we had to replace '`' with '\x60' to escape the backticks 23 `|(cpe:2\.3:[aho\*\-](:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){5}(:(([a-zA-Z]{2,3}(-([a-zA-Z]{2}|[0-9]{3}))?)|[\*\-]))(:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){4})$`) 24 25 var cpeRegex = regexp.MustCompile(cpeRegexString) 26 27 // New will parse a formatted CPE string and return a CPE object. Some input, such as the existence of whitespace 28 // characters is allowed, however, a more strict validation is done after this sanitization process. 29 func New(cpeStr string) (CPE, error) { 30 // get a CPE object based on the given string --don't validate yet since it may be possible to escape select cases on the callers behalf 31 c, err := newWithoutValidation(cpeStr) 32 if err != nil { 33 return CPE{}, fmt.Errorf("unable to parse CPE string: %w", err) 34 } 35 36 // ensure that this CPE can be validated after being fully sanitized 37 if ValidateString(String(c)) != nil { 38 return CPE{}, err 39 } 40 41 // we don't return the sanitized string, as this is a concern for later when creating CPE strings. In fact, since 42 // sanitization is lossy (whitespace is replaced, not escaped) it's important that the raw values are left as. 43 return c, nil 44 } 45 46 // Must returns a CPE or panics if the provided string is not valid 47 func Must(cpeStr string) CPE { 48 c, err := New(cpeStr) 49 if err != nil { 50 panic(err) 51 } 52 return c 53 } 54 55 func ValidateString(cpeStr string) error { 56 // We should filter out all CPEs that do not match the official CPE regex 57 // The facebook nvdtools parser can sometimes incorrectly parse invalid CPE strings 58 if !cpeRegex.MatchString(cpeStr) { 59 return fmt.Errorf("failed to parse CPE=%q as it doesn't match the regex=%s", cpeStr, cpeRegexString) 60 } 61 return nil 62 } 63 64 func newWithoutValidation(cpeStr string) (CPE, error) { 65 value, err := wfn.Parse(cpeStr) 66 if err != nil { 67 return CPE{}, fmt.Errorf("failed to parse CPE=%q: %w", cpeStr, err) 68 } 69 70 if value == nil { 71 return CPE{}, fmt.Errorf("failed to parse CPE=%q", cpeStr) 72 } 73 74 // we need to compare the raw data since we are constructing CPEs in other locations 75 value.Vendor = normalizeField(value.Vendor) 76 value.Product = normalizeField(value.Product) 77 value.Language = normalizeField(value.Language) 78 value.Version = normalizeField(value.Version) 79 value.TargetSW = normalizeField(value.TargetSW) 80 value.Part = normalizeField(value.Part) 81 value.Edition = normalizeField(value.Edition) 82 value.Other = normalizeField(value.Other) 83 value.SWEdition = normalizeField(value.SWEdition) 84 value.TargetHW = normalizeField(value.TargetHW) 85 value.Update = normalizeField(value.Update) 86 87 return *value, nil 88 } 89 90 func normalizeField(field string) string { 91 // replace spaces with underscores (per section 5.3.2 of the CPE spec v 2.3) 92 field = strings.ReplaceAll(field, " ", "_") 93 94 // keep dashes and forward slashes unescaped 95 if field == "*" { 96 return wfn.Any 97 } 98 return stripSlashes(field) 99 } 100 101 // stripSlashes is a reverse of the sanitize function below. 102 // It correctly removes slashes that are followed by allowed puncts. 103 // This is to allow for a correct round trip parsing of cpes with quoted characters. 104 func stripSlashes(s string) string { 105 sb := strings.Builder{} 106 for i, c := range s { 107 if c == '\\' && i+1 < len(s) && strings.ContainsRune(allowedCPEPunctuation, rune(s[i+1])) { 108 continue 109 } 110 sb.WriteRune(c) 111 } 112 return sb.String() 113 } 114 115 func String(c CPE) string { 116 output := CPE{} 117 output.Vendor = sanitize(c.Vendor) 118 output.Product = sanitize(c.Product) 119 output.Language = sanitize(c.Language) 120 output.Version = sanitize(c.Version) 121 output.TargetSW = sanitize(c.TargetSW) 122 output.Part = sanitize(c.Part) 123 output.Edition = sanitize(c.Edition) 124 output.Other = sanitize(c.Other) 125 output.SWEdition = sanitize(c.SWEdition) 126 output.TargetHW = sanitize(c.TargetHW) 127 output.Update = sanitize(c.Update) 128 return output.BindToFmtString() 129 } 130 131 // sanitize is a modified version of WFNize function from nvdtools 132 // that quotes all the allowed punctation chars with a slash and replaces 133 // spaces with underscores. It differs from the upstream implmentation as 134 // it does not use the buggy nvdtools implementation, specifically the "addSlashesAt" part of the 135 // function which stops the loop as soon as it encounters ":" a valid 136 // character for a WFN attribute after quoting, but the way nvdtools 137 // handles it causes it to truncate strings that container ":". As a result 138 // strings like "prefix:1.2" which would have been quoted as "prefix\:1.2" 139 // end up becoming "prefix" instead causing loss of information and 140 // incorrect CPEs being generated. 141 func sanitize(s string) string { 142 // replace spaces with underscores 143 in := strings.ReplaceAll(s, " ", "_") 144 145 // escape allowable punctuation per section 5.3.2 in the CPE 2.3 spec 146 sb := strings.Builder{} 147 for _, c := range in { 148 if strings.ContainsRune(allowedCPEPunctuation, c) { 149 sb.WriteRune('\\') 150 } 151 sb.WriteRune(c) 152 } 153 return sb.String() 154 }