github.com/vmware/govmomi@v0.51.0/ovf/parser.go (about)

     1  // © Broadcom. All Rights Reserved.
     2  // The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.
     3  // SPDX-License-Identifier: Apache-2.0
     4  
     5  package ovf
     6  
     7  import (
     8  	"math"
     9  	"regexp"
    10  	"strconv"
    11  	"strings"
    12  )
    13  
    14  // These are used to validate the overall structure of the string being parsed and to differentiate tokens as we are
    15  // processing them
    16  var (
    17  	blankRegexp         = regexp.MustCompile(`[[:blank:]]`)
    18  	validIntegerRegexp  = regexp.MustCompile(`^([1-9]\d*)$`)
    19  	validExponentRegexp = regexp.MustCompile(`^([1-9]\d*\^[1-9]\d*)$`)
    20  	validByteUnitRegexp = regexp.MustCompile(`((^|kilo|kibi|mega|mebi|giga|gibi)byte(s?)$)`)
    21  	validCapacityRegexp = regexp.MustCompile(`^[[:blank:]]*((([1-9]\d*\^[1-9]\d*)|([1-9]\d*))($|[[:blank:]]*\*[[:blank:]]*))*(([a-zA-Z]*(b|B)(y|Y)(t|T)(e|E)(s|S)?)($|([[:blank:]]*\*[[:blank:]]*(([1-9]\d*\^[1-9]\d*)|([1-9]\d*)))*))?$`)
    22  )
    23  
    24  // We only handle kilo, kibi, mega, mebi, giga, gibi prefixes due to size constraints of int64/uint64, but more
    25  // importantly because prefixes larger than giga & gibi don't make sense for our use-case
    26  var prefixMultipliers = map[string]int64{
    27  	"byte":     1,                      // byte
    28  	"kilobyte": 1 * 1000,               // byte * 1000
    29  	"kibibyte": 1 * 1024,               // byte * 1024
    30  	"megabyte": 1 * 1000 * 1000,        // byte * 1000 * 1000 = kilobyte * 1000
    31  	"mebibyte": 1 * 1024 * 1024,        // byte * 1024 * 1024 = kibibyte * 1024
    32  	"gigabyte": 1 * 1000 * 1000 * 1000, // byte * 1000 * 1000 * 1000 = kilobyte * 1000 * 1000 = megabyte * 1000
    33  	"gibibyte": 1 * 1024 * 1024 * 1024, // byte * 1024 * 1024 * 1024 = kibibyte * 1024 * 1024 = mebibyte * 1024
    34  }
    35  
    36  // ParseCapacityAllocationUnits validates the string s is a valid programmatic unit with respect to the base unit 'byte'
    37  // and parses the string to return the number of bytes s represents
    38  func ParseCapacityAllocationUnits(s string) int64 {
    39  	// Any strings which don't match against the regular expression are deemed invalid and zero is returned as the result
    40  	if !validCapacityString(s) {
    41  		return 0
    42  	}
    43  	var capacityBytes int64 = 1
    44  	// Remove any whitespace in s and lowercase any alphabetic characters. Removal of whitespace is done after
    45  	// validating against the regular expression because whitespace is valid for the most part, but is not valid
    46  	// for exponential terms, e.g 2 ^ 10
    47  	s = strings.ToLower(blankRegexp.ReplaceAllString(s, ""))
    48  	// Split s on multiplication operator (*) so that we can just calculate integer multipliers. Each token will
    49  	// then be either an integer, an exponential term to be converted to an integer, or a unit term to be converted
    50  	// to an integer
    51  	tokens := strings.Split(s, "*")
    52  
    53  	// Loop through all tokens and convert any to integers if necessary and use to compute a running product
    54  	for _, token := range tokens {
    55  		switch {
    56  		// "" should be treated identically to "byte". capacityBytes is already set to 1 so there is nothing to do
    57  		case len(token) == 0:
    58  			continue
    59  		case validByteUnitString(token):
    60  			capacityBytes = capacityBytes * prefixMultipliers[strings.TrimSuffix(token, "s")]
    61  		case validExponentString(token):
    62  			p := strings.Split(token, "^")
    63  			b, _ := strconv.ParseInt(p[0], 10, 64)
    64  			e, _ := strconv.ParseInt(p[1], 10, 64)
    65  			capacityBytes = capacityBytes * int64(math.Pow(float64(b), float64(e)))
    66  		case validIntegerString(token):
    67  			n, _ := strconv.ParseInt(token, 10, 64)
    68  			capacityBytes = capacityBytes * n
    69  		default:
    70  			// This should be unreachable. validCapacityString should have filtered out anything that cannot be
    71  			// matched by the non-default cases
    72  			capacityBytes = 0
    73  		}
    74  	}
    75  	return capacityBytes
    76  }
    77  
    78  // validIntegerString matches the string s against the regular expression `^([1-9]\d*)$`; i.e. s should be of the form:
    79  // any non-zero digit ([1-9]), followed by zero or more digits (\d*)
    80  func validIntegerString(s string) bool {
    81  	return validIntegerRegexp.MatchString(s)
    82  }
    83  
    84  // validExponentString matches the string s against the regular expression `^([1-9]\d*\^[1-9]\d*)$`; i.e. s should be of
    85  // the form: any non-zero digit ([1-9]), followed by a caret (^) followed by any non-zero digit ([1-9]), followed by zero
    86  // or more digits (\d*)
    87  func validExponentString(s string) bool {
    88  	return validExponentRegexp.MatchString(s)
    89  }
    90  
    91  // validByteUnitString matches the string s against a regular expression which only allows a unit of byte
    92  // (optionally plural) with a valid decimal or binary prefix. See prefixMultipliers
    93  func validByteUnitString(s string) bool {
    94  	return validByteUnitRegexp.MatchString(s)
    95  }
    96  
    97  // validCapacityString matches the string s against the regular expression validCapacityRegexp and verifies that s is a
    98  // valid programmatic unit with respect to the base unit 'byte'.
    99  //
   100  // Per the OVF schema defined in DSP8023: "If not specified default value is bytes. Value shall match a recognized value
   101  // for the UNITS qualifier in DSP0004"
   102  //
   103  // DSP004 defines a programmatic unit as:
   104  //
   105  // programmatic-unit = [ sign ] *S unit-element *( *S unit-operator *S unit-element )
   106  // sign = HYPHEN
   107  // unit-element = number / [ prefix ] base-unit [ CARET exponent ]
   108  // unit-operator = "*" / "/"
   109  // number = floatingpoint-number / exponent-number
   110  //
   111  // ; An exponent shall be interpreted as a floating point number
   112  // ; with the specified decimal base and exponent and a mantissa of 1
   113  // exponent-number = base CARET exponent
   114  // base = integer-number
   115  // exponent = [ sign ] integer-number
   116  //
   117  // ; An integer shall be interpreted as a decimal integer number
   118  // integer-number = NON-ZERO-DIGIT *( DIGIT )
   119  //
   120  // ; A float shall be interpreted as a decimal floating point number
   121  // floatingpoint-number = 1*( DIGIT ) [ "." ] *( DIGIT )
   122  //
   123  // ; A prefix for a base unit (e.g. "kilo"). The numeric equivalents of
   124  // ; these prefixes shall be interpreted as multiplication factors for the
   125  // ; directly succeeding base unit. In other words, if a prefixed base
   126  // ; unit is in the denominator of the overall programmatic unit, the
   127  // ; numeric equivalent of that prefix is also in the denominator
   128  // prefix = decimal-prefix / binary-prefix
   129  //
   130  // ; SI decimal prefixes as defined in ISO 1000
   131  // decimal-prefix =
   132  //
   133  //	  "deca" ; 10^1
   134  //	/ "hecto" ; 10^2
   135  //	/ "kilo" ; 10^3
   136  //	/ "mega" ; 10^6
   137  //	/ "giga" ; 10^9
   138  //	/ "tera" ; 10^12
   139  //	/ "peta" ; 10^15
   140  //	/ "exa" ; 10^18
   141  //	/ "zetta" ; 10^21
   142  //	/ "yotta" ; 10^24
   143  //	/ "deci" ; 10^-1
   144  //	/ "centi" ; 10^-2
   145  //	/ "milli" ; 10^-3
   146  //	/ "micro" ; 10^-6
   147  //	/ "nano" ; 10^-9
   148  //	/ "pico" ; 10^-12
   149  //	/ "femto" ; 10^-15
   150  //	/ "atto" ; 10^-18
   151  //	/ "zepto" ; 10^-21
   152  //	/ "yocto" ; 10^-24
   153  //
   154  // ; IEC binary prefixes as defined in IEC 80000-13
   155  // binary-prefix =
   156  //
   157  //	  "kibi" ; 2^10
   158  //	/ "mebi" ´ ; 2^20
   159  //	/ "gibi" ; 2^30
   160  //	/ "tebi" ; 2^40
   161  //	/ "pebi" ; 2^50
   162  //	/ "exbi" ; 2^60
   163  //	/ "zebi" ; 2^70
   164  //	/ "yobi" ; 2^80
   165  //
   166  // ; The name of a base unit
   167  // base-unit = standard-unit / extension-unit
   168  //
   169  // ; The name of a standard base unit
   170  // standard-unit = UNIT-IDENTIFIER
   171  //
   172  // ; The name of an extension base unit. If UNIT-IDENTIFIER begins with a
   173  // ; prefix (see prefix ABNF rule), the meaning of that prefix shall not be
   174  // ; changed by the extension base unit (examples of this for standard base
   175  // ; units are "decibel" or "kilogram")
   176  // ; extension-unit = org-id COLON UNIT-IDENTIFIER
   177  //
   178  // ; org-id shall include a copyrighted, trademarked, or otherwise unique
   179  // ; name that is owned by the business entity that is defining the
   180  // ; extension unit, or that is a registered ID assigned to the business
   181  // ; entity by a recognized global authority. org-id shall not begin with
   182  // ; a prefix (see prefix ABNF rule)
   183  // org-id = UNIT-IDENTIFIER
   184  // UNIT-IDENTIFIER = FIRST-UNIT-CHAR [ *( MID-UNIT-CHAR )
   185  // LAST-UNIT-CHAR ]
   186  // FIRST-UNIT-CHAR = UPPERALPHA / LOWERALPHA / UNDERSCORE
   187  // LAST-UNIT-CHAR = FIRST-UNIT-CHAR / DIGIT / PARENS
   188  // MID-UNIT-CHAR = LAST-UNIT-CHAR / HYPHEN / S
   189  //
   190  // DIGIT = ZERO / NON-ZERO-DIGIT
   191  // ZERO = "0"
   192  // NON-ZERO-DIGIT = "1"-"9"
   193  // HYPHEN = U+002D ; "-"
   194  // CARET = U+005E ; "^"
   195  // COLON = U+003A ; ":"
   196  // UPPERALPHA = U+0041-005A ; "A" ... "Z"
   197  // LOWERALPHA = U+0061-007A ; "a" ... "z"
   198  // UNDERSCORE = U+005F ; "_"
   199  // PARENS = U+0028 / U+0029 ; "(", ")"
   200  // S = U+0020 ; " "
   201  //
   202  // This definition is further restricted as such a broad definition by the above grammar does not make sense in the
   203  // context of virtual disk capacity.
   204  //
   205  // We do not allow for negative values, division operations, floating-point numbers, negative exponents, nor the use of
   206  // multiple units. Furthermore, we limit the allowed decimal and binary prefixes. This gives us:
   207  //
   208  // programmatic-unit =
   209  //
   210  //	   number
   211  //		/ [prefix] base-unit
   212  //		/ number *( *S unit-operator *S number) *S unit-operator *S [prefix] base-unit
   213  //		/ [prefix] base-unit *( *S unit-operator *S number)
   214  //		/ number *( *S unit-operator *S number) *S unit-operator *S [prefix] base-unit *( *S unit-operator *S number)
   215  //
   216  // unit-operator = "*"
   217  // number = integer-number / exponent-number
   218  // exponent-number = base CARET exponent
   219  // base = integer-number
   220  // exponent = integer-number
   221  // integer-number = NON-ZERO-DIGIT *( DIGIT )
   222  // prefix = decimal-prefix / binary-prefix
   223  //
   224  // decimal-prefix =
   225  //
   226  //	  "kilo" ; 10^3
   227  //	/ "mega" ; 10^6
   228  //	/ "giga" ; 10^9
   229  //
   230  // binary-prefix =
   231  //
   232  //	  "kibi" ; 2^10
   233  //	/ "mebi" ; 2^20
   234  //	/ "gibi" ; 2^30
   235  //
   236  // This function and the regular expression validCapacityRegexp are used to verify that the string we are parsing follows
   237  // our above restricted grammar
   238  func validCapacityString(s string) bool {
   239  	// Integer followed by a trailing '*' is not handled by the regular expression and so is explicitly checked
   240  	return validCapacityRegexp.MatchString(s) && !strings.HasSuffix(s, "*")
   241  }