github.com/vmware/govmomi@v0.51.0/ovf/parser.go (about) 1 // © Broadcom. All Rights Reserved. 2 // The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. 3 // SPDX-License-Identifier: Apache-2.0 4 5 package ovf 6 7 import ( 8 "math" 9 "regexp" 10 "strconv" 11 "strings" 12 ) 13 14 // These are used to validate the overall structure of the string being parsed and to differentiate tokens as we are 15 // processing them 16 var ( 17 blankRegexp = regexp.MustCompile(`[[:blank:]]`) 18 validIntegerRegexp = regexp.MustCompile(`^([1-9]\d*)$`) 19 validExponentRegexp = regexp.MustCompile(`^([1-9]\d*\^[1-9]\d*)$`) 20 validByteUnitRegexp = regexp.MustCompile(`((^|kilo|kibi|mega|mebi|giga|gibi)byte(s?)$)`) 21 validCapacityRegexp = regexp.MustCompile(`^[[:blank:]]*((([1-9]\d*\^[1-9]\d*)|([1-9]\d*))($|[[:blank:]]*\*[[:blank:]]*))*(([a-zA-Z]*(b|B)(y|Y)(t|T)(e|E)(s|S)?)($|([[:blank:]]*\*[[:blank:]]*(([1-9]\d*\^[1-9]\d*)|([1-9]\d*)))*))?$`) 22 ) 23 24 // We only handle kilo, kibi, mega, mebi, giga, gibi prefixes due to size constraints of int64/uint64, but more 25 // importantly because prefixes larger than giga & gibi don't make sense for our use-case 26 var prefixMultipliers = map[string]int64{ 27 "byte": 1, // byte 28 "kilobyte": 1 * 1000, // byte * 1000 29 "kibibyte": 1 * 1024, // byte * 1024 30 "megabyte": 1 * 1000 * 1000, // byte * 1000 * 1000 = kilobyte * 1000 31 "mebibyte": 1 * 1024 * 1024, // byte * 1024 * 1024 = kibibyte * 1024 32 "gigabyte": 1 * 1000 * 1000 * 1000, // byte * 1000 * 1000 * 1000 = kilobyte * 1000 * 1000 = megabyte * 1000 33 "gibibyte": 1 * 1024 * 1024 * 1024, // byte * 1024 * 1024 * 1024 = kibibyte * 1024 * 1024 = mebibyte * 1024 34 } 35 36 // ParseCapacityAllocationUnits validates the string s is a valid programmatic unit with respect to the base unit 'byte' 37 // and parses the string to return the number of bytes s represents 38 func ParseCapacityAllocationUnits(s string) int64 { 39 // Any strings which don't match against the regular expression are deemed invalid and zero is returned as the result 40 if !validCapacityString(s) { 41 return 0 42 } 43 var capacityBytes int64 = 1 44 // Remove any whitespace in s and lowercase any alphabetic characters. Removal of whitespace is done after 45 // validating against the regular expression because whitespace is valid for the most part, but is not valid 46 // for exponential terms, e.g 2 ^ 10 47 s = strings.ToLower(blankRegexp.ReplaceAllString(s, "")) 48 // Split s on multiplication operator (*) so that we can just calculate integer multipliers. Each token will 49 // then be either an integer, an exponential term to be converted to an integer, or a unit term to be converted 50 // to an integer 51 tokens := strings.Split(s, "*") 52 53 // Loop through all tokens and convert any to integers if necessary and use to compute a running product 54 for _, token := range tokens { 55 switch { 56 // "" should be treated identically to "byte". capacityBytes is already set to 1 so there is nothing to do 57 case len(token) == 0: 58 continue 59 case validByteUnitString(token): 60 capacityBytes = capacityBytes * prefixMultipliers[strings.TrimSuffix(token, "s")] 61 case validExponentString(token): 62 p := strings.Split(token, "^") 63 b, _ := strconv.ParseInt(p[0], 10, 64) 64 e, _ := strconv.ParseInt(p[1], 10, 64) 65 capacityBytes = capacityBytes * int64(math.Pow(float64(b), float64(e))) 66 case validIntegerString(token): 67 n, _ := strconv.ParseInt(token, 10, 64) 68 capacityBytes = capacityBytes * n 69 default: 70 // This should be unreachable. validCapacityString should have filtered out anything that cannot be 71 // matched by the non-default cases 72 capacityBytes = 0 73 } 74 } 75 return capacityBytes 76 } 77 78 // validIntegerString matches the string s against the regular expression `^([1-9]\d*)$`; i.e. s should be of the form: 79 // any non-zero digit ([1-9]), followed by zero or more digits (\d*) 80 func validIntegerString(s string) bool { 81 return validIntegerRegexp.MatchString(s) 82 } 83 84 // validExponentString matches the string s against the regular expression `^([1-9]\d*\^[1-9]\d*)$`; i.e. s should be of 85 // the form: any non-zero digit ([1-9]), followed by a caret (^) followed by any non-zero digit ([1-9]), followed by zero 86 // or more digits (\d*) 87 func validExponentString(s string) bool { 88 return validExponentRegexp.MatchString(s) 89 } 90 91 // validByteUnitString matches the string s against a regular expression which only allows a unit of byte 92 // (optionally plural) with a valid decimal or binary prefix. See prefixMultipliers 93 func validByteUnitString(s string) bool { 94 return validByteUnitRegexp.MatchString(s) 95 } 96 97 // validCapacityString matches the string s against the regular expression validCapacityRegexp and verifies that s is a 98 // valid programmatic unit with respect to the base unit 'byte'. 99 // 100 // Per the OVF schema defined in DSP8023: "If not specified default value is bytes. Value shall match a recognized value 101 // for the UNITS qualifier in DSP0004" 102 // 103 // DSP004 defines a programmatic unit as: 104 // 105 // programmatic-unit = [ sign ] *S unit-element *( *S unit-operator *S unit-element ) 106 // sign = HYPHEN 107 // unit-element = number / [ prefix ] base-unit [ CARET exponent ] 108 // unit-operator = "*" / "/" 109 // number = floatingpoint-number / exponent-number 110 // 111 // ; An exponent shall be interpreted as a floating point number 112 // ; with the specified decimal base and exponent and a mantissa of 1 113 // exponent-number = base CARET exponent 114 // base = integer-number 115 // exponent = [ sign ] integer-number 116 // 117 // ; An integer shall be interpreted as a decimal integer number 118 // integer-number = NON-ZERO-DIGIT *( DIGIT ) 119 // 120 // ; A float shall be interpreted as a decimal floating point number 121 // floatingpoint-number = 1*( DIGIT ) [ "." ] *( DIGIT ) 122 // 123 // ; A prefix for a base unit (e.g. "kilo"). The numeric equivalents of 124 // ; these prefixes shall be interpreted as multiplication factors for the 125 // ; directly succeeding base unit. In other words, if a prefixed base 126 // ; unit is in the denominator of the overall programmatic unit, the 127 // ; numeric equivalent of that prefix is also in the denominator 128 // prefix = decimal-prefix / binary-prefix 129 // 130 // ; SI decimal prefixes as defined in ISO 1000 131 // decimal-prefix = 132 // 133 // "deca" ; 10^1 134 // / "hecto" ; 10^2 135 // / "kilo" ; 10^3 136 // / "mega" ; 10^6 137 // / "giga" ; 10^9 138 // / "tera" ; 10^12 139 // / "peta" ; 10^15 140 // / "exa" ; 10^18 141 // / "zetta" ; 10^21 142 // / "yotta" ; 10^24 143 // / "deci" ; 10^-1 144 // / "centi" ; 10^-2 145 // / "milli" ; 10^-3 146 // / "micro" ; 10^-6 147 // / "nano" ; 10^-9 148 // / "pico" ; 10^-12 149 // / "femto" ; 10^-15 150 // / "atto" ; 10^-18 151 // / "zepto" ; 10^-21 152 // / "yocto" ; 10^-24 153 // 154 // ; IEC binary prefixes as defined in IEC 80000-13 155 // binary-prefix = 156 // 157 // "kibi" ; 2^10 158 // / "mebi" ´ ; 2^20 159 // / "gibi" ; 2^30 160 // / "tebi" ; 2^40 161 // / "pebi" ; 2^50 162 // / "exbi" ; 2^60 163 // / "zebi" ; 2^70 164 // / "yobi" ; 2^80 165 // 166 // ; The name of a base unit 167 // base-unit = standard-unit / extension-unit 168 // 169 // ; The name of a standard base unit 170 // standard-unit = UNIT-IDENTIFIER 171 // 172 // ; The name of an extension base unit. If UNIT-IDENTIFIER begins with a 173 // ; prefix (see prefix ABNF rule), the meaning of that prefix shall not be 174 // ; changed by the extension base unit (examples of this for standard base 175 // ; units are "decibel" or "kilogram") 176 // ; extension-unit = org-id COLON UNIT-IDENTIFIER 177 // 178 // ; org-id shall include a copyrighted, trademarked, or otherwise unique 179 // ; name that is owned by the business entity that is defining the 180 // ; extension unit, or that is a registered ID assigned to the business 181 // ; entity by a recognized global authority. org-id shall not begin with 182 // ; a prefix (see prefix ABNF rule) 183 // org-id = UNIT-IDENTIFIER 184 // UNIT-IDENTIFIER = FIRST-UNIT-CHAR [ *( MID-UNIT-CHAR ) 185 // LAST-UNIT-CHAR ] 186 // FIRST-UNIT-CHAR = UPPERALPHA / LOWERALPHA / UNDERSCORE 187 // LAST-UNIT-CHAR = FIRST-UNIT-CHAR / DIGIT / PARENS 188 // MID-UNIT-CHAR = LAST-UNIT-CHAR / HYPHEN / S 189 // 190 // DIGIT = ZERO / NON-ZERO-DIGIT 191 // ZERO = "0" 192 // NON-ZERO-DIGIT = "1"-"9" 193 // HYPHEN = U+002D ; "-" 194 // CARET = U+005E ; "^" 195 // COLON = U+003A ; ":" 196 // UPPERALPHA = U+0041-005A ; "A" ... "Z" 197 // LOWERALPHA = U+0061-007A ; "a" ... "z" 198 // UNDERSCORE = U+005F ; "_" 199 // PARENS = U+0028 / U+0029 ; "(", ")" 200 // S = U+0020 ; " " 201 // 202 // This definition is further restricted as such a broad definition by the above grammar does not make sense in the 203 // context of virtual disk capacity. 204 // 205 // We do not allow for negative values, division operations, floating-point numbers, negative exponents, nor the use of 206 // multiple units. Furthermore, we limit the allowed decimal and binary prefixes. This gives us: 207 // 208 // programmatic-unit = 209 // 210 // number 211 // / [prefix] base-unit 212 // / number *( *S unit-operator *S number) *S unit-operator *S [prefix] base-unit 213 // / [prefix] base-unit *( *S unit-operator *S number) 214 // / number *( *S unit-operator *S number) *S unit-operator *S [prefix] base-unit *( *S unit-operator *S number) 215 // 216 // unit-operator = "*" 217 // number = integer-number / exponent-number 218 // exponent-number = base CARET exponent 219 // base = integer-number 220 // exponent = integer-number 221 // integer-number = NON-ZERO-DIGIT *( DIGIT ) 222 // prefix = decimal-prefix / binary-prefix 223 // 224 // decimal-prefix = 225 // 226 // "kilo" ; 10^3 227 // / "mega" ; 10^6 228 // / "giga" ; 10^9 229 // 230 // binary-prefix = 231 // 232 // "kibi" ; 2^10 233 // / "mebi" ; 2^20 234 // / "gibi" ; 2^30 235 // 236 // This function and the regular expression validCapacityRegexp are used to verify that the string we are parsing follows 237 // our above restricted grammar 238 func validCapacityString(s string) bool { 239 // Integer followed by a trailing '*' is not handled by the regular expression and so is explicitly checked 240 return validCapacityRegexp.MatchString(s) && !strings.HasSuffix(s, "*") 241 }