github.com/Redstoneguy129/cli@v0.0.0-20230211220159-15dca4e91917/internal/utils/parser/state.go (about)

     1  package parser
     2  
     3  import (
     4  	"bytes"
     5  	"unicode"
     6  	"unicode/utf8"
     7  )
     8  
     9  type State interface {
    10  	// Return nil to emit token
    11  	Next(r rune, data []byte) State
    12  }
    13  
    14  // Initial state: ready to parse next token
    15  type ReadyState struct{}
    16  
    17  func (s *ReadyState) Next(r rune, data []byte) State {
    18  	switch r {
    19  	case '$':
    20  		offset := len(data) - utf8.RuneLen(r)
    21  		return &TagState{offset: offset}
    22  	case '\'':
    23  		fallthrough
    24  	case '"':
    25  		return &QuoteState{delimiter: r}
    26  	case '-':
    27  		return &CommentState{}
    28  	case '/':
    29  		return &BlockState{}
    30  	case '\\':
    31  		return &EscapeState{}
    32  	case ';':
    33  		// Emit token
    34  		return nil
    35  	}
    36  	return s
    37  }
    38  
    39  // Opened a line comment
    40  type CommentState struct{}
    41  
    42  func (s *CommentState) Next(r rune, data []byte) State {
    43  	if r == '-' {
    44  		// No characters are escaped in comments, which is the same as dollar
    45  		return &DollarState{delimiter: []byte{'\n'}}
    46  	}
    47  	// Break out of comment state
    48  	state := &ReadyState{}
    49  	return state.Next(r, data)
    50  }
    51  
    52  // Opened a block comment
    53  type BlockState struct {
    54  	depth int
    55  }
    56  
    57  func (s *BlockState) Next(r rune, data []byte) State {
    58  	const open = "/*"
    59  	const close = "*/"
    60  	window := data[len(data)-2:]
    61  	if bytes.Equal(window, []byte(open)) {
    62  		s.depth += 1
    63  		return s
    64  	}
    65  	if s.depth == 0 {
    66  		// Break out of block state
    67  		state := &ReadyState{}
    68  		return state.Next(r, data)
    69  	}
    70  	if bytes.Equal(window, []byte(close)) {
    71  		s.depth -= 1
    72  		if s.depth == 0 {
    73  			return &ReadyState{}
    74  		}
    75  	}
    76  	return s
    77  }
    78  
    79  // Opened a single quote ' or double quote "
    80  type QuoteState struct {
    81  	delimiter rune
    82  	escape    bool
    83  }
    84  
    85  func (s *QuoteState) Next(r rune, data []byte) State {
    86  	if s.escape {
    87  		// Preserve escaped quote ''
    88  		if r == s.delimiter {
    89  			s.escape = false
    90  			return s
    91  		}
    92  		// Break out of quote state
    93  		state := &ReadyState{}
    94  		return state.Next(r, data)
    95  	}
    96  	if r == s.delimiter {
    97  		s.escape = true
    98  	}
    99  	return s
   100  }
   101  
   102  // Opened a dollar quote, no characters are ever esacped.
   103  type DollarState struct {
   104  	delimiter []byte
   105  }
   106  
   107  func (s *DollarState) Next(r rune, data []byte) State {
   108  	window := data[len(data)-len(s.delimiter):]
   109  	if bytes.Equal(window, s.delimiter) {
   110  		// Break out of block state
   111  		return &ReadyState{}
   112  	}
   113  	return s
   114  }
   115  
   116  // Opened a tag, ie. $tag$
   117  type TagState struct {
   118  	offset int
   119  }
   120  
   121  func (s *TagState) Next(r rune, data []byte) State {
   122  	if r == '$' {
   123  		// Make a copy since the data slice may be overwritten
   124  		tag := data[s.offset:]
   125  		dollar := DollarState{
   126  			delimiter: make([]byte, len(tag)),
   127  		}
   128  		copy(dollar.delimiter, tag)
   129  		return &dollar
   130  	}
   131  	// Valid tag: https://www.postgresql.org/docs/current/sql-syntax-lexical.html
   132  	if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
   133  		return s
   134  	}
   135  	// Break out of tag state
   136  	state := &ReadyState{}
   137  	return state.Next(r, data)
   138  }
   139  
   140  // Opened a \ escape
   141  type EscapeState struct{}
   142  
   143  func (s *EscapeState) Next(r rune, data []byte) State {
   144  	return &ReadyState{}
   145  }