github.com/supabase/cli@v1.168.1/internal/utils/parser/state.go (about)

     1  package parser
     2  
     3  import (
     4  	"bytes"
     5  	"strings"
     6  	"unicode"
     7  	"unicode/utf8"
     8  )
     9  
    10  const (
    11  	// Omit BEGIN to allow arbitrary whitespaces between BEGIN and ATOMIC keywords.
    12  	// This can fail if ATOMIC is used as column name because it is not a reserved
    13  	// keyword: https://www.postgresql.org/docs/current/sql-keywords-appendix.html
    14  	BEGIN_ATOMIC = "ATOMIC"
    15  	END_ATOMIC   = "END"
    16  )
    17  
    18  type State interface {
    19  	// Return nil to emit token
    20  	Next(r rune, data []byte) State
    21  }
    22  
    23  // Initial state: ready to parse next token
    24  type ReadyState struct{}
    25  
    26  func (s *ReadyState) Next(r rune, data []byte) State {
    27  	switch r {
    28  	case '$':
    29  		offset := len(data) - utf8.RuneLen(r)
    30  		return &TagState{offset: offset}
    31  	case '\'':
    32  		fallthrough
    33  	case '"':
    34  		return &QuoteState{delimiter: r}
    35  	case '-':
    36  		return &CommentState{}
    37  	case '/':
    38  		return &BlockState{}
    39  	case '\\':
    40  		return &EscapeState{}
    41  	case ';':
    42  		// Emit token
    43  		return nil
    44  	case '(':
    45  		return &AtomicState{prev: s, delimiter: []byte{')'}}
    46  	case 'c':
    47  		fallthrough
    48  	case 'C':
    49  		offset := len(data) - len(BEGIN_ATOMIC)
    50  		if offset >= 0 && strings.ToUpper(string(data[offset:])) == BEGIN_ATOMIC {
    51  			return &AtomicState{prev: s, delimiter: []byte(END_ATOMIC)}
    52  		}
    53  	}
    54  	return s
    55  }
    56  
    57  // Opened a line comment
    58  type CommentState struct{}
    59  
    60  func (s *CommentState) Next(r rune, data []byte) State {
    61  	if r == '-' {
    62  		// No characters are escaped in comments, which is the same as dollar
    63  		return &DollarState{delimiter: []byte{'\n'}}
    64  	}
    65  	// Break out of comment state
    66  	state := &ReadyState{}
    67  	return state.Next(r, data)
    68  }
    69  
    70  // Opened a block comment
    71  type BlockState struct {
    72  	depth int
    73  }
    74  
    75  func (s *BlockState) Next(r rune, data []byte) State {
    76  	const open = "/*"
    77  	const close = "*/"
    78  	window := data[len(data)-2:]
    79  	if bytes.Equal(window, []byte(open)) {
    80  		s.depth += 1
    81  		return s
    82  	}
    83  	if s.depth == 0 {
    84  		// Break out of block state
    85  		state := &ReadyState{}
    86  		return state.Next(r, data)
    87  	}
    88  	if bytes.Equal(window, []byte(close)) {
    89  		s.depth -= 1
    90  		if s.depth == 0 {
    91  			return &ReadyState{}
    92  		}
    93  	}
    94  	return s
    95  }
    96  
    97  // Opened a single quote ' or double quote "
    98  type QuoteState struct {
    99  	delimiter rune
   100  	escape    bool
   101  }
   102  
   103  func (s *QuoteState) Next(r rune, data []byte) State {
   104  	if s.escape {
   105  		// Preserve escaped quote ''
   106  		if r == s.delimiter {
   107  			s.escape = false
   108  			return s
   109  		}
   110  		// Break out of quote state
   111  		state := &ReadyState{}
   112  		return state.Next(r, data)
   113  	}
   114  	if r == s.delimiter {
   115  		s.escape = true
   116  	}
   117  	return s
   118  }
   119  
   120  // Opened a dollar quote, no characters are ever esacped.
   121  type DollarState struct {
   122  	delimiter []byte
   123  }
   124  
   125  func (s *DollarState) Next(r rune, data []byte) State {
   126  	window := data[len(data)-len(s.delimiter):]
   127  	if bytes.Equal(window, s.delimiter) {
   128  		// Break out of dollar state
   129  		return &ReadyState{}
   130  	}
   131  	return s
   132  }
   133  
   134  // Opened a tag, ie. $tag$
   135  type TagState struct {
   136  	offset int
   137  }
   138  
   139  func (s *TagState) Next(r rune, data []byte) State {
   140  	if r == '$' {
   141  		// Make a copy since the data slice may be overwritten
   142  		tag := data[s.offset:]
   143  		dollar := DollarState{
   144  			delimiter: make([]byte, len(tag)),
   145  		}
   146  		copy(dollar.delimiter, tag)
   147  		return &dollar
   148  	}
   149  	// Valid tag: https://www.postgresql.org/docs/current/sql-syntax-lexical.html
   150  	if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
   151  		return s
   152  	}
   153  	// Break out of tag state
   154  	state := &ReadyState{}
   155  	return state.Next(r, data)
   156  }
   157  
   158  // Opened a \ escape
   159  type EscapeState struct{}
   160  
   161  func (s *EscapeState) Next(r rune, data []byte) State {
   162  	return &ReadyState{}
   163  }
   164  
   165  // Opened BEGIN ATOMIC function body
   166  type AtomicState struct {
   167  	prev      State
   168  	delimiter []byte
   169  }
   170  
   171  func (s *AtomicState) Next(r rune, data []byte) State {
   172  	// If we are in a quoted state, the current delimiter doesn't count.
   173  	if curr := s.prev.Next(r, data); curr != nil {
   174  		s.prev = curr
   175  	}
   176  	if _, ok := s.prev.(*ReadyState); ok {
   177  		window := data[len(data)-len(s.delimiter):]
   178  		// Treat delimiter as case insensitive
   179  		if strings.ToUpper(string(window)) == string(s.delimiter) {
   180  			return &ReadyState{}
   181  		}
   182  	}
   183  	return s
   184  }