github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/cpe/cpe.go (about)

     1  package cpe
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  	"strings"
     7  
     8  	"github.com/facebookincubator/nvdtools/wfn"
     9  )
    10  
    11  type CPE = wfn.Attributes
    12  
    13  const (
    14  	allowedCPEPunctuation = "-!\"#$%&'()+,./:;<=>@[]^`{|}~"
    15  )
    16  
    17  // This regex string is taken from
    18  // https://csrc.nist.gov/schema/cpe/2.3/cpe-naming_2.3.xsd which has the official cpe spec
    19  // This first part matches CPE urls and the second part matches binding strings
    20  const cpeRegexString = ((`^([c][pP][eE]:/[AHOaho]?(:[A-Za-z0-9\._\-~%]*){0,6})`) +
    21  	// Or match the CPE binding string
    22  	// Note that we had to replace '`' with '\x60' to escape the backticks
    23  	`|(cpe:2\.3:[aho\*\-](:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){5}(:(([a-zA-Z]{2,3}(-([a-zA-Z]{2}|[0-9]{3}))?)|[\*\-]))(:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){4})$`)
    24  
    25  var cpeRegex = regexp.MustCompile(cpeRegexString)
    26  
    27  // New will parse a formatted CPE string and return a CPE object. Some input, such as the existence of whitespace
    28  // characters is allowed, however, a more strict validation is done after this sanitization process.
    29  func New(cpeStr string) (CPE, error) {
    30  	// get a CPE object based on the given string --don't validate yet since it may be possible to escape select cases on the callers behalf
    31  	c, err := newWithoutValidation(cpeStr)
    32  	if err != nil {
    33  		return CPE{}, fmt.Errorf("unable to parse CPE string: %w", err)
    34  	}
    35  
    36  	// ensure that this CPE can be validated after being fully sanitized
    37  	if ValidateString(String(c)) != nil {
    38  		return CPE{}, err
    39  	}
    40  
    41  	// we don't return the sanitized string, as this is a concern for later when creating CPE strings. In fact, since
    42  	// sanitization is lossy (whitespace is replaced, not escaped) it's important that the raw values are left as.
    43  	return c, nil
    44  }
    45  
    46  // Must returns a CPE or panics if the provided string is not valid
    47  func Must(cpeStr string) CPE {
    48  	c, err := New(cpeStr)
    49  	if err != nil {
    50  		panic(err)
    51  	}
    52  	return c
    53  }
    54  
    55  func ValidateString(cpeStr string) error {
    56  	// We should filter out all CPEs that do not match the official CPE regex
    57  	// The facebook nvdtools parser can sometimes incorrectly parse invalid CPE strings
    58  	if !cpeRegex.MatchString(cpeStr) {
    59  		return fmt.Errorf("failed to parse CPE=%q as it doesn't match the regex=%s", cpeStr, cpeRegexString)
    60  	}
    61  	return nil
    62  }
    63  
    64  func newWithoutValidation(cpeStr string) (CPE, error) {
    65  	value, err := wfn.Parse(cpeStr)
    66  	if err != nil {
    67  		return CPE{}, fmt.Errorf("failed to parse CPE=%q: %w", cpeStr, err)
    68  	}
    69  
    70  	if value == nil {
    71  		return CPE{}, fmt.Errorf("failed to parse CPE=%q", cpeStr)
    72  	}
    73  
    74  	// we need to compare the raw data since we are constructing CPEs in other locations
    75  	value.Vendor = normalizeField(value.Vendor)
    76  	value.Product = normalizeField(value.Product)
    77  	value.Language = normalizeField(value.Language)
    78  	value.Version = normalizeField(value.Version)
    79  	value.TargetSW = normalizeField(value.TargetSW)
    80  	value.Part = normalizeField(value.Part)
    81  	value.Edition = normalizeField(value.Edition)
    82  	value.Other = normalizeField(value.Other)
    83  	value.SWEdition = normalizeField(value.SWEdition)
    84  	value.TargetHW = normalizeField(value.TargetHW)
    85  	value.Update = normalizeField(value.Update)
    86  
    87  	return *value, nil
    88  }
    89  
    90  func normalizeField(field string) string {
    91  	// replace spaces with underscores (per section 5.3.2 of the CPE spec v 2.3)
    92  	field = strings.ReplaceAll(field, " ", "_")
    93  
    94  	// keep dashes and forward slashes unescaped
    95  	if field == "*" {
    96  		return wfn.Any
    97  	}
    98  	return stripSlashes(field)
    99  }
   100  
   101  // stripSlashes is a reverse of the sanitize function below.
   102  // It correctly removes slashes that are followed by allowed puncts.
   103  // This is to allow for a correct round trip parsing of cpes with quoted characters.
   104  func stripSlashes(s string) string {
   105  	sb := strings.Builder{}
   106  	for i, c := range s {
   107  		if c == '\\' && i+1 < len(s) && strings.ContainsRune(allowedCPEPunctuation, rune(s[i+1])) {
   108  			continue
   109  		}
   110  		sb.WriteRune(c)
   111  	}
   112  	return sb.String()
   113  }
   114  
   115  func String(c CPE) string {
   116  	output := CPE{}
   117  	output.Vendor = sanitize(c.Vendor)
   118  	output.Product = sanitize(c.Product)
   119  	output.Language = sanitize(c.Language)
   120  	output.Version = sanitize(c.Version)
   121  	output.TargetSW = sanitize(c.TargetSW)
   122  	output.Part = sanitize(c.Part)
   123  	output.Edition = sanitize(c.Edition)
   124  	output.Other = sanitize(c.Other)
   125  	output.SWEdition = sanitize(c.SWEdition)
   126  	output.TargetHW = sanitize(c.TargetHW)
   127  	output.Update = sanitize(c.Update)
   128  	return output.BindToFmtString()
   129  }
   130  
   131  // sanitize is a modified version of WFNize function from nvdtools
   132  // that quotes all the allowed punctation chars with a slash and replaces
   133  // spaces with underscores. It differs from the upstream implmentation as
   134  // it does not use the buggy nvdtools implementation, specifically the "addSlashesAt" part of the
   135  // function which stops the loop as soon as it encounters ":" a valid
   136  // character for a WFN attribute after quoting, but the way nvdtools
   137  // handles it causes it to truncate strings that container ":". As a result
   138  // strings like "prefix:1.2" which would have been quoted as "prefix\:1.2"
   139  // end up becoming "prefix" instead causing loss of information and
   140  // incorrect CPEs being generated.
   141  func sanitize(s string) string {
   142  	// replace spaces with underscores
   143  	in := strings.ReplaceAll(s, " ", "_")
   144  
   145  	// escape allowable punctuation per section 5.3.2 in the CPE 2.3 spec
   146  	sb := strings.Builder{}
   147  	for _, c := range in {
   148  		if strings.ContainsRune(allowedCPEPunctuation, c) {
   149  			sb.WriteRune('\\')
   150  		}
   151  		sb.WriteRune(c)
   152  	}
   153  	return sb.String()
   154  }