github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/cpe/cpe.go (about) 1 package cpe 2 3 import ( 4 "fmt" 5 "regexp" 6 "strings" 7 8 "github.com/facebookincubator/nvdtools/wfn" 9 ) 10 11 // CPE contains the attributes of an NVD Attributes and a string 12 // describing where Syft got the Attributes, e.g. generated by heuristics 13 // vs looked up in the NVD Attributes dictionary 14 type CPE struct { 15 Attributes Attributes 16 Source Source 17 } 18 19 type Source string 20 21 func (c Source) String() string { 22 return string(c) 23 } 24 25 const ( 26 GeneratedSource Source = "syft-generated" 27 NVDDictionaryLookupSource Source = "nvd-cpe-dictionary" 28 DeclaredSource Source = "declared" 29 ) 30 31 const Any = "" 32 33 type Attributes struct { 34 Part string 35 Vendor string 36 Product string 37 Version string 38 Update string 39 Edition string 40 SWEdition string 41 TargetSW string 42 TargetHW string 43 Other string 44 Language string 45 } 46 47 func (c Attributes) asAttributes() wfn.Attributes { 48 return wfn.Attributes(c) 49 } 50 51 func fromAttributes(a wfn.Attributes) Attributes { 52 return Attributes(a) 53 } 54 55 func (c Attributes) BindToFmtString() string { 56 return c.asAttributes().BindToFmtString() 57 } 58 59 func NewWithAny() Attributes { 60 return fromAttributes(*(wfn.NewAttributesWithAny())) 61 } 62 63 const ( 64 allowedCPEPunctuation = "-!\"#$%&'()+,./:;<=>@[]^`{|}~" 65 ) 66 67 // This regex string is taken from 68 // https://csrc.nist.gov/schema/cpe/2.3/cpe-naming_2.3.xsd which has the official cpe spec 69 // This first part matches Attributes urls and the second part matches binding strings 70 const cpeRegexString = ((`^([c][pP][eE]:/[AHOaho]?(:[A-Za-z0-9\._\-~%]*){0,6})`) + 71 // Or match the Attributes binding string 72 // Note that we had to replace '`' with '\x60' to escape the backticks 73 `|(cpe:2\.3:[aho\*\-](:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){5}(:(([a-zA-Z]{2,3}(-([a-zA-Z]{2}|[0-9]{3}))?)|[\*\-]))(:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){4})$`) 74 75 var cpeRegex = regexp.MustCompile(cpeRegexString) 76 77 func New(value string, source Source) (CPE, error) { 78 attributes, err := NewAttributes(value) 79 if err != nil { 80 return CPE{}, err 81 } 82 return CPE{ 83 Attributes: attributes, 84 Source: source, 85 }, nil 86 } 87 88 // NewAttributes will parse a formatted Attributes string and return a Attributes object. Some input, such as the existence of whitespace 89 // characters is allowed, however, a more strict validation is done after this sanitization process. 90 func NewAttributes(cpeStr string) (Attributes, error) { 91 // get a Attributes object based on the given string --don't validate yet since it may be possible to escape select cases on the callers behalf 92 c, err := newWithoutValidation(cpeStr) 93 if err != nil { 94 return Attributes{}, fmt.Errorf("unable to parse Attributes string: %w", err) 95 } 96 97 // ensure that this Attributes can be validated after being fully sanitized 98 if ValidateString(c.String()) != nil { 99 return Attributes{}, err 100 } 101 102 // we don't return the sanitized string, as this is a concern for later when creating Attributes strings. In fact, since 103 // sanitization is lossy (whitespace is replaced, not escaped) it's important that the raw values are left as. 104 return c, nil 105 } 106 107 // Must returns a CPE or panics if the provided string is not valid 108 func Must(cpeStr string, source Source) CPE { 109 c := MustAttributes(cpeStr) 110 return CPE{ 111 Attributes: c, 112 Source: source, 113 } 114 } 115 116 func MustAttributes(cpeStr string) Attributes { 117 c, err := NewAttributes(cpeStr) 118 if err != nil { 119 panic(err) 120 } 121 return c 122 } 123 124 func ValidateString(cpeStr string) error { 125 // We should filter out all CPEs that do not match the official Attributes regex 126 // The facebook nvdtools parser can sometimes incorrectly parse invalid Attributes strings 127 if !cpeRegex.MatchString(cpeStr) { 128 return fmt.Errorf("failed to parse Attributes=%q as it doesn't match the regex=%s", cpeStr, cpeRegexString) 129 } 130 return nil 131 } 132 133 func newWithoutValidation(cpeStr string) (Attributes, error) { 134 value, err := wfn.Parse(cpeStr) 135 if err != nil { 136 return Attributes{}, fmt.Errorf("failed to parse Attributes=%q: %w", cpeStr, err) 137 } 138 139 if value == nil { 140 return Attributes{}, fmt.Errorf("failed to parse Attributes=%q", cpeStr) 141 } 142 143 syftCPE := fromAttributes(*value) 144 145 // we need to compare the raw data since we are constructing CPEs in other locations 146 syftCPE.Vendor = normalizeField(syftCPE.Vendor) 147 syftCPE.Product = normalizeField(syftCPE.Product) 148 syftCPE.Language = normalizeField(syftCPE.Language) 149 syftCPE.Version = normalizeField(syftCPE.Version) 150 syftCPE.TargetSW = normalizeField(syftCPE.TargetSW) 151 syftCPE.Part = normalizeField(syftCPE.Part) 152 syftCPE.Edition = normalizeField(syftCPE.Edition) 153 syftCPE.Other = normalizeField(syftCPE.Other) 154 syftCPE.SWEdition = normalizeField(syftCPE.SWEdition) 155 syftCPE.TargetHW = normalizeField(syftCPE.TargetHW) 156 syftCPE.Update = normalizeField(syftCPE.Update) 157 158 return syftCPE, nil 159 } 160 161 func normalizeField(field string) string { 162 // replace spaces with underscores (per section 5.3.2 of the Attributes spec v 2.3) 163 field = strings.ReplaceAll(field, " ", "_") 164 165 // keep dashes and forward slashes unescaped 166 if field == "*" { 167 return Any 168 } 169 return stripSlashes(field) 170 } 171 172 // stripSlashes is a reverse of the sanitize function below. 173 // It correctly removes slashes that are followed by allowed puncts. 174 // This is to allow for a correct round trip parsing of cpes with quoted characters. 175 func stripSlashes(s string) string { 176 sb := strings.Builder{} 177 for i, c := range s { 178 if c == '\\' && i+1 < len(s) && strings.ContainsRune(allowedCPEPunctuation, rune(s[i+1])) { 179 continue 180 } 181 sb.WriteRune(c) 182 } 183 return sb.String() 184 } 185 186 func (c Attributes) String() string { 187 output := Attributes{} 188 output.Vendor = sanitize(c.Vendor) 189 output.Product = sanitize(c.Product) 190 output.Language = sanitize(c.Language) 191 output.Version = sanitize(c.Version) 192 output.TargetSW = sanitize(c.TargetSW) 193 output.Part = sanitize(c.Part) 194 output.Edition = sanitize(c.Edition) 195 output.Other = sanitize(c.Other) 196 output.SWEdition = sanitize(c.SWEdition) 197 output.TargetHW = sanitize(c.TargetHW) 198 output.Update = sanitize(c.Update) 199 return output.BindToFmtString() 200 } 201 202 // sanitize is a modified version of WFNize function from nvdtools 203 // that quotes all the allowed punctation chars with a slash and replaces 204 // spaces with underscores. It differs from the upstream implmentation as 205 // it does not use the buggy nvdtools implementation, specifically the "addSlashesAt" part of the 206 // function which stops the loop as soon as it encounters ":" a valid 207 // character for a WFN attribute after quoting, but the way nvdtools 208 // handles it causes it to truncate strings that container ":". As a result 209 // strings like "prefix:1.2" which would have been quoted as "prefix\:1.2" 210 // end up becoming "prefix" instead causing loss of information and 211 // incorrect CPEs being generated. 212 func sanitize(s string) string { 213 // replace spaces with underscores 214 in := strings.ReplaceAll(s, " ", "_") 215 216 // escape allowable punctuation per section 5.3.2 in the CPE 2.3 spec 217 sb := strings.Builder{} 218 for _, c := range in { 219 if strings.ContainsRune(allowedCPEPunctuation, c) { 220 sb.WriteRune('\\') 221 } 222 sb.WriteRune(c) 223 } 224 return sb.String() 225 }