github.com/blend/go-sdk@v1.20220411.3/env/parse.go (about)

     1  /*
     2  
     3  Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved
     4  Use of this source code is governed by a MIT license that can be found in the LICENSE file.
     5  
     6  */
     7  
     8  package env
     9  
    10  import (
    11  	"fmt"
    12  	"strings"
    13  	"unicode"
    14  
    15  	"github.com/blend/go-sdk/ex"
    16  )
    17  
    18  // Parse uses a state machine to parse an input string into the `Vars` type.
    19  // It uses a default pair delimiter of ';'.
    20  func Parse(s string) (Vars, error) {
    21  	return ParsePairDelimiter(s, PairDelimiterSemicolon)
    22  }
    23  
    24  // ParsePairDelimiter uses a state machine to parse an input string into the `Vars` type.
    25  // The user can choose which delimiter to use between key-value pairs.
    26  //
    27  // An example of this format:
    28  //
    29  // ENV_VAR_1=VALUE_1;ENV_VAR_2=VALUE_2;
    30  //
    31  // We define the grammar as such (in BNF notation):
    32  // <expr> ::= (<pair> <sep>)* <pair>
    33  // <sep> ::= ';'
    34  //        |  ','
    35  // <pair> ::= <term> = <term>
    36  // <term> ::= <literal>
    37  //         |  "[<literal>|<space>|<escape_quote>]*"
    38  // <literal> ::= [-A-Za-z_0-9]+
    39  // <space> ::= ' '
    40  // <escape_quote> ::= '\"'
    41  func ParsePairDelimiter(s string, pairDelimiter PairDelimiter) (Vars, error) {
    42  	ret := make(Vars)
    43  	var key string
    44  	var buffer []rune
    45  	state := rootState
    46  
    47  	// indicates whether the value delimiter has been encountered for the current pair
    48  	var exists, valueFlag bool
    49  
    50  	for _, c := range s {
    51  		// The explanations for each state and what actions should occur in the
    52  		// DFA are found in the comments for each enum
    53  		switch state {
    54  		case rootState:
    55  			// In the case where we have a key=value pair, we want to add that
    56  			// to the map and clear out our buffers
    57  			switch c {
    58  			case pairDelimiter:
    59  				if _, exists = ret[key]; exists {
    60  					return ret, ex.New(fmt.Sprintf("Duplicate keys are not allowed (%s)", key))
    61  				}
    62  
    63  				if len(key) == 0 {
    64  					return ret, ex.New("Empty keys are not allowed")
    65  				}
    66  
    67  				// This means that we have a term with no '=', which is illegal
    68  				if !valueFlag {
    69  					return ret, ex.New("Expected '='")
    70  				}
    71  
    72  				ret[key] = string(buffer)
    73  
    74  				// clear out the buffers and start over
    75  				buffer = nil
    76  				key = ""
    77  				valueFlag = false
    78  				continue
    79  			case escapeDelimiter:
    80  				state = escapeState
    81  				continue
    82  			case valueDelimiter:
    83  				state = valueState
    84  				continue
    85  			case quoteDelimiter:
    86  				state = quotedState
    87  				continue
    88  			default:
    89  				if unicode.IsSpace(c) {
    90  					continue
    91  				}
    92  				buffer = append(buffer, c)
    93  				continue
    94  			}
    95  		case escapeState:
    96  			buffer = append(buffer, c)
    97  			state = rootState
    98  		case valueState:
    99  			if len(buffer) == 0 {
   100  				return ret, ex.New("Empty keys are not allowed")
   101  			}
   102  			key = string(buffer)
   103  			buffer = nil
   104  			valueFlag = true
   105  
   106  			if c == quoteDelimiter {
   107  				state = quotedState
   108  			} else {
   109  				if !unicode.IsSpace(c) {
   110  					buffer = append(buffer, c)
   111  				}
   112  				state = rootState
   113  			}
   114  		case quotedState:
   115  			if c == escapeDelimiter {
   116  				// ignore the escape and continue
   117  				state = quotedLiteralState
   118  			} else if c == quoteDelimiter {
   119  				state = rootState
   120  			} else {
   121  				buffer = append(buffer, c)
   122  			}
   123  		case quotedLiteralState:
   124  			// Escape literal within a quote, goes back to quote mode
   125  			buffer = append(buffer, c)
   126  			state = quotedState
   127  		}
   128  	}
   129  
   130  	// State 0 is the only valid ending state. If this is not the case, then
   131  	// show the user a parsing error. In the event the input wasn't terminated,
   132  	// we can mitigate by taking the last key-val pair from the buffers.
   133  	switch state {
   134  	case rootState:
   135  		// This handles the case where the key-value pair doesn't have a
   136  		// separator (which is valid grammar). We could go about the option of
   137  		// inserting an extra separator, but that is difficult to do as a
   138  		// preprocessing step because you could have a scenario where there are
   139  		// trailing spaces, or even an escaped ending delimiter.
   140  		if len(buffer) > 0 || len(key) > 0 {
   141  			if !valueFlag {
   142  				return ret, ex.New("Expected '='")
   143  			}
   144  			ret[key] = string(buffer)
   145  		}
   146  	case escapeState:
   147  		return ret, ex.New("Ended input on an escape delimiter ('\\')")
   148  	case valueState:
   149  		return ret, ex.New("Failed to assign a value to some key")
   150  	case quotedState:
   151  		return ret, ex.New("Unclosed quote")
   152  	case quotedLiteralState:
   153  		return ret, ex.New("Ended input on an escape delimiter ('\\')")
   154  	}
   155  	return ret, nil
   156  }
   157  
   158  const (
   159  	// valueDelimiter ("=") is the delimiter between a key and a value for an
   160  	// environment variable.
   161  	valueDelimiter rune = '='
   162  
   163  	// quoteDelimiter (`"`) is a delimiter indicating a string literal. This
   164  	// gives the user the option to have spaces, for example, in their
   165  	// environment variable values.
   166  	quoteDelimiter rune = '"'
   167  
   168  	// escapeDelimiter ("\") is used to escape the next character so it is
   169  	// accepted as a part of the input value.
   170  	escapeDelimiter rune = '\\'
   171  )
   172  
   173  // dfaState is a wrapper type for the standard enum integer type, representing
   174  // the state of the parsing table for the DFA. We create a new type so that we
   175  // can use a switch case on this particular enum type and not worry about
   176  // accidentally setting the state to an invalid value.
   177  type dfaState int
   178  
   179  const (
   180  	// rootState is the "default" starting state state. It processes text
   181  	// normally, performing actions on tokens and excluding whitespace.
   182  	rootState dfaState = iota
   183  
   184  	// escapeState represents the state encountered after the parser processes
   185  	// the escape delimiter. The next character will be stored in the buffer no
   186  	// matter what, and no actions will be dispatched, even if the next
   187  	// character is a token.
   188  	escapeState dfaState = iota
   189  
   190  	// valueState is the state encountered after encountering the value
   191  	// delimiter ('='). Being in this state indicates that buffer is no longer
   192  	// storing values for the key.
   193  	valueState dfaState = iota
   194  
   195  	// quotedState is the state encountered after the parser encounters a
   196  	// quote. This means that all characters except for the literal escape
   197  	// value will be input into the buffer.
   198  	quotedState dfaState = iota
   199  
   200  	// quotedLiteralState is invoked after the parser encounters
   201  	// a `quoteDelimiter` from `quotedState`.
   202  	quotedLiteralState dfaState = iota
   203  )
   204  
   205  // PairDelimiter is a type of delimiter that separates different env var key-value pairs
   206  type PairDelimiter = rune
   207  
   208  const (
   209  	// PairDelimiterSemicolon (";") is a delimiter between key-value pairs
   210  	PairDelimiterSemicolon PairDelimiter = ';'
   211  
   212  	// PairDelimiterComma  (",") is a delimiter betewen key-value pairs
   213  	PairDelimiterComma PairDelimiter = ','
   214  )
   215  
   216  // DelimitedString converts environment variables to a particular string
   217  // representation, allowing the user to specify which delimiter to use between
   218  // different environment variable pairs.
   219  func (ev Vars) DelimitedString(separator PairDelimiter) string {
   220  	var serializedPairs []string
   221  
   222  	// For each key, value pair, convert it into a "key=value;" pair and
   223  	// continue appending to the output string for each pair
   224  	for k, v := range ev {
   225  		if k != "" {
   226  			var pair []rune
   227  			pair = append(pair, quoteDelimiter)
   228  			pair = append(pair, []rune(escapeString(k, separator))...)
   229  			pair = append(pair, quoteDelimiter)
   230  			pair = append(pair, valueDelimiter)
   231  			pair = append(pair, quoteDelimiter)
   232  			pair = append(pair, []rune(escapeString(v, separator))...)
   233  			pair = append(pair, quoteDelimiter)
   234  
   235  			serializedPairs = append(serializedPairs, string(pair))
   236  		}
   237  	}
   238  	return strings.Join(serializedPairs, string(separator))
   239  }
   240  
   241  // isToken returns whether a string is a special token that would need to be
   242  // escaped
   243  func isPairDelimiter(c rune, delimiter PairDelimiter) bool {
   244  	switch c {
   245  	case delimiter,
   246  		valueDelimiter,
   247  		quoteDelimiter,
   248  		escapeDelimiter:
   249  		return true
   250  	}
   251  	return false
   252  }
   253  
   254  // escapeString takes an string and escapes any special characters so that the
   255  // string can be serialized properly. The user must supply the delimiter used
   256  // to separate key-value pairs.
   257  func escapeString(s string, delimiter PairDelimiter) string {
   258  	var escaped []rune
   259  	for _, c := range s {
   260  		if isPairDelimiter(c, delimiter) {
   261  			escaped = append(escaped, escapeDelimiter)
   262  		}
   263  		escaped = append(escaped, c)
   264  	}
   265  	return string(escaped)
   266  }