github.com/blend/go-sdk@v1.20220411.3/env/parse.go (about) 1 /* 2 3 Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved 4 Use of this source code is governed by a MIT license that can be found in the LICENSE file. 5 6 */ 7 8 package env 9 10 import ( 11 "fmt" 12 "strings" 13 "unicode" 14 15 "github.com/blend/go-sdk/ex" 16 ) 17 18 // Parse uses a state machine to parse an input string into the `Vars` type. 19 // It uses a default pair delimiter of ';'. 20 func Parse(s string) (Vars, error) { 21 return ParsePairDelimiter(s, PairDelimiterSemicolon) 22 } 23 24 // ParsePairDelimiter uses a state machine to parse an input string into the `Vars` type. 25 // The user can choose which delimiter to use between key-value pairs. 26 // 27 // An example of this format: 28 // 29 // ENV_VAR_1=VALUE_1;ENV_VAR_2=VALUE_2; 30 // 31 // We define the grammar as such (in BNF notation): 32 // <expr> ::= (<pair> <sep>)* <pair> 33 // <sep> ::= ';' 34 // | ',' 35 // <pair> ::= <term> = <term> 36 // <term> ::= <literal> 37 // | "[<literal>|<space>|<escape_quote>]*" 38 // <literal> ::= [-A-Za-z_0-9]+ 39 // <space> ::= ' ' 40 // <escape_quote> ::= '\"' 41 func ParsePairDelimiter(s string, pairDelimiter PairDelimiter) (Vars, error) { 42 ret := make(Vars) 43 var key string 44 var buffer []rune 45 state := rootState 46 47 // indicates whether the value delimiter has been encountered for the current pair 48 var exists, valueFlag bool 49 50 for _, c := range s { 51 // The explanations for each state and what actions should occur in the 52 // DFA are found in the comments for each enum 53 switch state { 54 case rootState: 55 // In the case where we have a key=value pair, we want to add that 56 // to the map and clear out our buffers 57 switch c { 58 case pairDelimiter: 59 if _, exists = ret[key]; exists { 60 return ret, ex.New(fmt.Sprintf("Duplicate keys are not allowed (%s)", key)) 61 } 62 63 if len(key) == 0 { 64 return ret, ex.New("Empty keys are not allowed") 65 } 66 67 // This means that we have a term with no '=', which is illegal 68 if !valueFlag { 69 return ret, ex.New("Expected '='") 70 } 71 72 ret[key] = string(buffer) 73 74 // clear out the buffers and start over 75 buffer = nil 76 key = "" 77 valueFlag = false 78 continue 79 case escapeDelimiter: 80 state = escapeState 81 continue 82 case valueDelimiter: 83 state = valueState 84 continue 85 case quoteDelimiter: 86 state = quotedState 87 continue 88 default: 89 if unicode.IsSpace(c) { 90 continue 91 } 92 buffer = append(buffer, c) 93 continue 94 } 95 case escapeState: 96 buffer = append(buffer, c) 97 state = rootState 98 case valueState: 99 if len(buffer) == 0 { 100 return ret, ex.New("Empty keys are not allowed") 101 } 102 key = string(buffer) 103 buffer = nil 104 valueFlag = true 105 106 if c == quoteDelimiter { 107 state = quotedState 108 } else { 109 if !unicode.IsSpace(c) { 110 buffer = append(buffer, c) 111 } 112 state = rootState 113 } 114 case quotedState: 115 if c == escapeDelimiter { 116 // ignore the escape and continue 117 state = quotedLiteralState 118 } else if c == quoteDelimiter { 119 state = rootState 120 } else { 121 buffer = append(buffer, c) 122 } 123 case quotedLiteralState: 124 // Escape literal within a quote, goes back to quote mode 125 buffer = append(buffer, c) 126 state = quotedState 127 } 128 } 129 130 // State 0 is the only valid ending state. If this is not the case, then 131 // show the user a parsing error. In the event the input wasn't terminated, 132 // we can mitigate by taking the last key-val pair from the buffers. 133 switch state { 134 case rootState: 135 // This handles the case where the key-value pair doesn't have a 136 // separator (which is valid grammar). We could go about the option of 137 // inserting an extra separator, but that is difficult to do as a 138 // preprocessing step because you could have a scenario where there are 139 // trailing spaces, or even an escaped ending delimiter. 140 if len(buffer) > 0 || len(key) > 0 { 141 if !valueFlag { 142 return ret, ex.New("Expected '='") 143 } 144 ret[key] = string(buffer) 145 } 146 case escapeState: 147 return ret, ex.New("Ended input on an escape delimiter ('\\')") 148 case valueState: 149 return ret, ex.New("Failed to assign a value to some key") 150 case quotedState: 151 return ret, ex.New("Unclosed quote") 152 case quotedLiteralState: 153 return ret, ex.New("Ended input on an escape delimiter ('\\')") 154 } 155 return ret, nil 156 } 157 158 const ( 159 // valueDelimiter ("=") is the delimiter between a key and a value for an 160 // environment variable. 161 valueDelimiter rune = '=' 162 163 // quoteDelimiter (`"`) is a delimiter indicating a string literal. This 164 // gives the user the option to have spaces, for example, in their 165 // environment variable values. 166 quoteDelimiter rune = '"' 167 168 // escapeDelimiter ("\") is used to escape the next character so it is 169 // accepted as a part of the input value. 170 escapeDelimiter rune = '\\' 171 ) 172 173 // dfaState is a wrapper type for the standard enum integer type, representing 174 // the state of the parsing table for the DFA. We create a new type so that we 175 // can use a switch case on this particular enum type and not worry about 176 // accidentally setting the state to an invalid value. 177 type dfaState int 178 179 const ( 180 // rootState is the "default" starting state state. It processes text 181 // normally, performing actions on tokens and excluding whitespace. 182 rootState dfaState = iota 183 184 // escapeState represents the state encountered after the parser processes 185 // the escape delimiter. The next character will be stored in the buffer no 186 // matter what, and no actions will be dispatched, even if the next 187 // character is a token. 188 escapeState dfaState = iota 189 190 // valueState is the state encountered after encountering the value 191 // delimiter ('='). Being in this state indicates that buffer is no longer 192 // storing values for the key. 193 valueState dfaState = iota 194 195 // quotedState is the state encountered after the parser encounters a 196 // quote. This means that all characters except for the literal escape 197 // value will be input into the buffer. 198 quotedState dfaState = iota 199 200 // quotedLiteralState is invoked after the parser encounters 201 // a `quoteDelimiter` from `quotedState`. 202 quotedLiteralState dfaState = iota 203 ) 204 205 // PairDelimiter is a type of delimiter that separates different env var key-value pairs 206 type PairDelimiter = rune 207 208 const ( 209 // PairDelimiterSemicolon (";") is a delimiter between key-value pairs 210 PairDelimiterSemicolon PairDelimiter = ';' 211 212 // PairDelimiterComma (",") is a delimiter betewen key-value pairs 213 PairDelimiterComma PairDelimiter = ',' 214 ) 215 216 // DelimitedString converts environment variables to a particular string 217 // representation, allowing the user to specify which delimiter to use between 218 // different environment variable pairs. 219 func (ev Vars) DelimitedString(separator PairDelimiter) string { 220 var serializedPairs []string 221 222 // For each key, value pair, convert it into a "key=value;" pair and 223 // continue appending to the output string for each pair 224 for k, v := range ev { 225 if k != "" { 226 var pair []rune 227 pair = append(pair, quoteDelimiter) 228 pair = append(pair, []rune(escapeString(k, separator))...) 229 pair = append(pair, quoteDelimiter) 230 pair = append(pair, valueDelimiter) 231 pair = append(pair, quoteDelimiter) 232 pair = append(pair, []rune(escapeString(v, separator))...) 233 pair = append(pair, quoteDelimiter) 234 235 serializedPairs = append(serializedPairs, string(pair)) 236 } 237 } 238 return strings.Join(serializedPairs, string(separator)) 239 } 240 241 // isToken returns whether a string is a special token that would need to be 242 // escaped 243 func isPairDelimiter(c rune, delimiter PairDelimiter) bool { 244 switch c { 245 case delimiter, 246 valueDelimiter, 247 quoteDelimiter, 248 escapeDelimiter: 249 return true 250 } 251 return false 252 } 253 254 // escapeString takes an string and escapes any special characters so that the 255 // string can be serialized properly. The user must supply the delimiter used 256 // to separate key-value pairs. 257 func escapeString(s string, delimiter PairDelimiter) string { 258 var escaped []rune 259 for _, c := range s { 260 if isPairDelimiter(c, delimiter) { 261 escaped = append(escaped, escapeDelimiter) 262 } 263 escaped = append(escaped, c) 264 } 265 return string(escaped) 266 }