github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/cpe/cpe.go (about)

     1  package cpe
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  	"strings"
     7  
     8  	"github.com/facebookincubator/nvdtools/wfn"
     9  )
    10  
    11  // CPE contains the attributes of an NVD Attributes and a string
    12  // describing where Syft got the Attributes, e.g. generated by heuristics
    13  // vs looked up in the NVD Attributes dictionary
    14  type CPE struct {
    15  	Attributes Attributes
    16  	Source     Source
    17  }
    18  
    19  type Source string
    20  
    21  func (c Source) String() string {
    22  	return string(c)
    23  }
    24  
    25  const (
    26  	GeneratedSource           Source = "syft-generated"
    27  	NVDDictionaryLookupSource Source = "nvd-cpe-dictionary"
    28  	DeclaredSource            Source = "declared"
    29  )
    30  
    31  const Any = ""
    32  
    33  type Attributes struct {
    34  	Part      string
    35  	Vendor    string
    36  	Product   string
    37  	Version   string
    38  	Update    string
    39  	Edition   string
    40  	SWEdition string
    41  	TargetSW  string
    42  	TargetHW  string
    43  	Other     string
    44  	Language  string
    45  }
    46  
    47  func (c Attributes) asAttributes() wfn.Attributes {
    48  	return wfn.Attributes(c)
    49  }
    50  
    51  func fromAttributes(a wfn.Attributes) Attributes {
    52  	return Attributes(a)
    53  }
    54  
    55  func (c Attributes) BindToFmtString() string {
    56  	return c.asAttributes().BindToFmtString()
    57  }
    58  
    59  func NewWithAny() Attributes {
    60  	return fromAttributes(*(wfn.NewAttributesWithAny()))
    61  }
    62  
    63  const (
    64  	allowedCPEPunctuation = "-!\"#$%&'()+,./:;<=>@[]^`{|}~"
    65  )
    66  
    67  // This regex string is taken from
    68  // https://csrc.nist.gov/schema/cpe/2.3/cpe-naming_2.3.xsd which has the official cpe spec
    69  // This first part matches Attributes urls and the second part matches binding strings
    70  const cpeRegexString = ((`^([c][pP][eE]:/[AHOaho]?(:[A-Za-z0-9\._\-~%]*){0,6})`) +
    71  	// Or match the Attributes binding string
    72  	// Note that we had to replace '`' with '\x60' to escape the backticks
    73  	`|(cpe:2\.3:[aho\*\-](:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){5}(:(([a-zA-Z]{2,3}(-([a-zA-Z]{2}|[0-9]{3}))?)|[\*\-]))(:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){4})$`)
    74  
    75  var cpeRegex = regexp.MustCompile(cpeRegexString)
    76  
    77  func New(value string, source Source) (CPE, error) {
    78  	attributes, err := NewAttributes(value)
    79  	if err != nil {
    80  		return CPE{}, err
    81  	}
    82  	return CPE{
    83  		Attributes: attributes,
    84  		Source:     source,
    85  	}, nil
    86  }
    87  
    88  // NewAttributes will parse a formatted Attributes string and return a Attributes object. Some input, such as the existence of whitespace
    89  // characters is allowed, however, a more strict validation is done after this sanitization process.
    90  func NewAttributes(cpeStr string) (Attributes, error) {
    91  	// get a Attributes object based on the given string --don't validate yet since it may be possible to escape select cases on the callers behalf
    92  	c, err := newWithoutValidation(cpeStr)
    93  	if err != nil {
    94  		return Attributes{}, fmt.Errorf("unable to parse Attributes string: %w", err)
    95  	}
    96  
    97  	// ensure that this Attributes can be validated after being fully sanitized
    98  	if ValidateString(c.String()) != nil {
    99  		return Attributes{}, err
   100  	}
   101  
   102  	// we don't return the sanitized string, as this is a concern for later when creating Attributes strings. In fact, since
   103  	// sanitization is lossy (whitespace is replaced, not escaped) it's important that the raw values are left as.
   104  	return c, nil
   105  }
   106  
   107  // Must returns a CPE or panics if the provided string is not valid
   108  func Must(cpeStr string, source Source) CPE {
   109  	c := MustAttributes(cpeStr)
   110  	return CPE{
   111  		Attributes: c,
   112  		Source:     source,
   113  	}
   114  }
   115  
   116  func MustAttributes(cpeStr string) Attributes {
   117  	c, err := NewAttributes(cpeStr)
   118  	if err != nil {
   119  		panic(err)
   120  	}
   121  	return c
   122  }
   123  
   124  func ValidateString(cpeStr string) error {
   125  	// We should filter out all CPEs that do not match the official Attributes regex
   126  	// The facebook nvdtools parser can sometimes incorrectly parse invalid Attributes strings
   127  	if !cpeRegex.MatchString(cpeStr) {
   128  		return fmt.Errorf("failed to parse Attributes=%q as it doesn't match the regex=%s", cpeStr, cpeRegexString)
   129  	}
   130  	return nil
   131  }
   132  
   133  func newWithoutValidation(cpeStr string) (Attributes, error) {
   134  	value, err := wfn.Parse(cpeStr)
   135  	if err != nil {
   136  		return Attributes{}, fmt.Errorf("failed to parse Attributes=%q: %w", cpeStr, err)
   137  	}
   138  
   139  	if value == nil {
   140  		return Attributes{}, fmt.Errorf("failed to parse Attributes=%q", cpeStr)
   141  	}
   142  
   143  	syftCPE := fromAttributes(*value)
   144  
   145  	// we need to compare the raw data since we are constructing CPEs in other locations
   146  	syftCPE.Vendor = normalizeField(syftCPE.Vendor)
   147  	syftCPE.Product = normalizeField(syftCPE.Product)
   148  	syftCPE.Language = normalizeField(syftCPE.Language)
   149  	syftCPE.Version = normalizeField(syftCPE.Version)
   150  	syftCPE.TargetSW = normalizeField(syftCPE.TargetSW)
   151  	syftCPE.Part = normalizeField(syftCPE.Part)
   152  	syftCPE.Edition = normalizeField(syftCPE.Edition)
   153  	syftCPE.Other = normalizeField(syftCPE.Other)
   154  	syftCPE.SWEdition = normalizeField(syftCPE.SWEdition)
   155  	syftCPE.TargetHW = normalizeField(syftCPE.TargetHW)
   156  	syftCPE.Update = normalizeField(syftCPE.Update)
   157  
   158  	return syftCPE, nil
   159  }
   160  
   161  func normalizeField(field string) string {
   162  	// replace spaces with underscores (per section 5.3.2 of the Attributes spec v 2.3)
   163  	field = strings.ReplaceAll(field, " ", "_")
   164  
   165  	// keep dashes and forward slashes unescaped
   166  	if field == "*" {
   167  		return Any
   168  	}
   169  	return stripSlashes(field)
   170  }
   171  
   172  // stripSlashes is a reverse of the sanitize function below.
   173  // It correctly removes slashes that are followed by allowed puncts.
   174  // This is to allow for a correct round trip parsing of cpes with quoted characters.
   175  func stripSlashes(s string) string {
   176  	sb := strings.Builder{}
   177  	for i, c := range s {
   178  		if c == '\\' && i+1 < len(s) && strings.ContainsRune(allowedCPEPunctuation, rune(s[i+1])) {
   179  			continue
   180  		}
   181  		sb.WriteRune(c)
   182  	}
   183  	return sb.String()
   184  }
   185  
   186  func (c Attributes) String() string {
   187  	output := Attributes{}
   188  	output.Vendor = sanitize(c.Vendor)
   189  	output.Product = sanitize(c.Product)
   190  	output.Language = sanitize(c.Language)
   191  	output.Version = sanitize(c.Version)
   192  	output.TargetSW = sanitize(c.TargetSW)
   193  	output.Part = sanitize(c.Part)
   194  	output.Edition = sanitize(c.Edition)
   195  	output.Other = sanitize(c.Other)
   196  	output.SWEdition = sanitize(c.SWEdition)
   197  	output.TargetHW = sanitize(c.TargetHW)
   198  	output.Update = sanitize(c.Update)
   199  	return output.BindToFmtString()
   200  }
   201  
   202  // sanitize is a modified version of WFNize function from nvdtools
   203  // that quotes all the allowed punctation chars with a slash and replaces
   204  // spaces with underscores. It differs from the upstream implmentation as
   205  // it does not use the buggy nvdtools implementation, specifically the "addSlashesAt" part of the
   206  // function which stops the loop as soon as it encounters ":" a valid
   207  // character for a WFN attribute after quoting, but the way nvdtools
   208  // handles it causes it to truncate strings that container ":". As a result
   209  // strings like "prefix:1.2" which would have been quoted as "prefix\:1.2"
   210  // end up becoming "prefix" instead causing loss of information and
   211  // incorrect CPEs being generated.
   212  func sanitize(s string) string {
   213  	// replace spaces with underscores
   214  	in := strings.ReplaceAll(s, " ", "_")
   215  
   216  	// escape allowable punctuation per section 5.3.2 in the CPE 2.3 spec
   217  	sb := strings.Builder{}
   218  	for _, c := range in {
   219  		if strings.ContainsRune(allowedCPEPunctuation, c) {
   220  			sb.WriteRune('\\')
   221  		}
   222  		sb.WriteRune(c)
   223  	}
   224  	return sb.String()
   225  }